Merge pull request #773 from broadinstitute/rhl_annotator_warning

Make annotators emit a warning if they can't be applied
This commit is contained in:
ldgauthier 2014-11-12 08:01:19 -05:00
commit bcac930aad
17 changed files with 387 additions and 150 deletions

View File

@ -51,13 +51,16 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
@ -91,6 +94,12 @@ import java.util.*;
*
*/
public class DepthPerSampleHC extends GenotypeAnnotation {
private final static Logger logger = Logger.getLogger(DepthPerSampleHC.class);
private boolean walkerIdentityCheckWarningLogged = false;
private boolean alleleLikelihoodMapWarningLogged = false;
private boolean alleleLikelihoodMapSubsetWarningLogged = false;
@Override
public void annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
@ -102,22 +111,42 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
return;
if (alleleLikelihoodMap == null )
throw new IllegalStateException("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
if ( !(walker instanceof HaplotypeCaller) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return;
}
if (alleleLikelihoodMap == null ){
if ( !alleleLikelihoodMapWarningLogged ) {
logger.warn("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
alleleLikelihoodMapWarningLogged = true;
}
return;
}
// the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot
// differentiate between reads that align over the event but aren't informative vs. those that aren't even
// close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
int dp = 0;
if ( alleleLikelihoodMap.isEmpty() ) {
// there are no reads
} else {
// there are reads
if ( !alleleLikelihoodMap.isEmpty() ) {
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! alleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
if ( !alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) {
if ( !alleleLikelihoodMapSubsetWarningLogged ) {
logger.warn("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
alleleLikelihoodMapSubsetWarningLogged = true;
}
return;
}
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
@ -130,10 +159,12 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
}
}
@Override
public List<String> getKeyNames() {
return Collections.singletonList(VCFConstants.DEPTH_KEY);
}
@Override
public List<VCFFormatHeaderLine> getDescriptions() {
return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
}

View File

@ -54,9 +54,9 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import cern.jet.math.Arithmetic;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType;
@ -94,7 +94,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
private static final int MIN_COUNT = 2;
private static final int MIN_COUNT = ARRAY_DIM;
@Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
@ -154,10 +154,12 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
return Collections.singletonMap(FS, value);
}
@Override
public List<String> getKeyNames() {
return Collections.singletonList(FS);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
}
@ -168,15 +170,19 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
* @return the array used by the per-sample Strand Bias annotation
*/
public static List<Integer> getContingencyArray( final int[][] table ) {
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
if(table.length != ARRAY_DIM || table[0].length != ARRAY_DIM) {
logger.warn("Expecting a " + ARRAY_DIM + "x" + ARRAY_DIM + " strand bias table.");
return null;
}
final List<Integer> list = new ArrayList<>(ARRAY_SIZE); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
list.add(table[0][0]);
list.add(table[0][1]);
list.add(table[1][0]);
list.add(table[1][1]);
return list;
}
private Double pValueForContingencyTable(int[][] originalTable) {
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
@ -231,9 +237,9 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
final int[][] normalized = new int[2][2];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2; j++ )
final int[][] normalized = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < ARRAY_DIM; j++ )
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
}
@ -241,10 +247,10 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
}
private static int [][] copyContingencyTable(int [][] t) {
int[][] c = new int[2][2];
int[][] c = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < 2; i++ )
for ( int j = 0; j < 2; j++ )
for ( int i = 0; i < ARRAY_DIM; i++ )
for ( int j = 0; j < ARRAY_DIM; j++ )
c[i][j] = t[i][j];
return c;
@ -270,21 +276,21 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
}
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
table[0][0]--;
table[1][0]++;
table[0][1] += 1;
table[1][1] -= 1;
table[0][1]++;
table[1][1]--;
return (table[0][0] >= 0 && table[1][1] >= 0);
}
private static boolean unrotateTable(int[][] table) {
table[0][0] += 1;
table[1][0] -= 1;
table[0][0]++;
table[1][0]--;
table[0][1] -= 1;
table[1][1] += 1;
table[0][1]--;
table[1][1]++;
return (table[0][1] >= 0 && table[1][0] >= 0);
}

View File

@ -50,7 +50,8 @@
*/
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@ -86,17 +87,33 @@ import java.util.*;
* <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p>
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(HaplotypeScore.class);
private boolean walkerIdentityCheckWarningLogged = false;
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.';
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc);
else
@ -107,7 +124,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc) {
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
if (stratifiedContexts.isEmpty()) // empty means that call was made by someone else and we have no data here
return null;
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
@ -393,10 +410,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return mismatches - expected;
}
@Override
public List<String> getKeyNames() {
return Arrays.asList("HaplotypeScore");
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
}

View File

@ -52,13 +52,14 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
@ -66,10 +67,7 @@ import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -82,17 +80,20 @@ import java.util.Map;
*
* <h3>Caveats</h3>
* <ul>
* <li>This annotation requires multiple samples and a valid pedigree file.</li>
* <li>This annotation requires multiple samples.</li>
* <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li>
* <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li>
* </ul>
*/
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(HardyWeinberg.class);
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
private boolean warningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
@ -101,8 +102,13 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) {
if ( !warningLogged ) {
logger.warn("Too few genotypes");
warningLogged = true;
}
return null;
}
int refCount = 0;
int hetCount = 0;
@ -136,7 +142,9 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
return map;
}
@Override
public List<String> getKeyNames() { return Arrays.asList("HW"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
}

View File

@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -85,10 +86,12 @@ import java.util.*;
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(InbreedingCoeff.class);
private static final int MIN_SAMPLES = 10;
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
private Set<String> founderIds;
private int sampleCount;
private boolean pedigreeCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
@ -98,10 +101,20 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
if(founderIds == null && walker != null)
if(founderIds == null && walker != null) {
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
}
if ( founderIds == null || founderIds.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
else{
return makeCoeffAnnotation(vc);
}
}
protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
@ -124,7 +137,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
{
if (g.isHetNonRef()) {
//all likelihoods go to homCount
homCount += 1;
homCount++;
continue;
}

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -93,9 +93,12 @@ import java.util.*;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(MVLikelihoodRatio.class);
private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";
private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -103,15 +106,31 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) {
// Must have a pedigree file
trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) {
if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, mendelian violation annotation must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
}
}
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
//double pNoMV = 1.0;
@ -131,9 +150,11 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
return attributeMap;
}
// return the descriptions used for the VCF INFO meta field
// return the names and descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }

View File

@ -51,20 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.samples.Trio;
import org.broadinstitute.gatk.engine.walkers.Walker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -93,6 +94,8 @@ import java.util.*;
public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(PossibleDeNovo.class);
private MendelianViolation mendelianViolation = null;
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
@ -101,6 +104,8 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
private final int flatNumberOfSamplesCutoff = 4;
private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -108,15 +113,29 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !(walker instanceof VariantAnnotator ) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) {
trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) {
if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
}
}
final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
boolean isHighConfDeNovo = false;
@ -152,8 +171,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
}
// return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }

View File

@ -51,22 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -83,13 +82,29 @@ import java.util.Map;
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(SpanningDeletions.class);
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( stratifiedContexts.size() == 0 )
// Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( stratifiedContexts.isEmpty() )
return null;
// not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1
@ -111,7 +126,9 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
return map;
}
@Override
public List<String> getKeyNames() { return Arrays.asList("Dels"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
}

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.log4j.Logger;
@ -77,34 +78,33 @@ import java.util.*;
*/
public abstract class StrandBiasTest extends InfoFieldAnnotation {
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
private static boolean stratifiedPerReadAlleleLikelihoodMapWarningLogged = false;
private static boolean inputVariantContextWarningLogged = false;
private static boolean getTableFromSamplesWarningLogged = false;
private static boolean decodeSBBSWarningLogged = false;
protected static final int ARRAY_DIM = 2;
protected static final int ARRAY_SIZE = ARRAY_DIM * ARRAY_DIM;
@Override
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
boolean hasSBBSannotation = false;
// Does the VCF header contain strand bias (SB) by sample annotation?
for ( final VCFHeaderLine line : headerLines) {
if ( line instanceof VCFFormatHeaderLine) {
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
hasSBBSannotation = true;
break;
}
}
}
if (hasSBBSannotation) {
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
if ( formatline.getID().equals(VCFConstants.STRAND_BIAS_KEY) ) {
logger.warn("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
return;
}
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
if (hasReads) {
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
return;
}
}
logger.info("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
// Are there reads from a SAM/BAM file?
if (toolkit.getReadsDataSource().getReaderIDs().isEmpty())
logger.warn("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
else
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
}
@Override
@ -115,35 +115,38 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final Map<String,AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// do not process if not a variant
if ( !vc.isVariant() )
return null;
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
if ( vc.hasGenotypes() ) {
boolean hasSB = false;
for (final Genotype g : vc.getGenotypes()) {
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
hasSB = true;
break;
}
}
if (hasSB)
return calculateAnnotationFromGTfield(vc.getGenotypes());
}
}
}
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) {
// if a the variant is a snp and has stratified contexts, calculate the annotation from the stratified contexts
//stratifiedContexts can come come from VariantAnnotator, but will be empty if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && !stratifiedContexts.isEmpty()) {
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
}
// calculate the annotation from the stratified per read likelihood map
// stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
}
else
else {
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
// for indels that do not have a computed strand bias (SB) or strand bias by sample (SBBS)
return null;
}
}
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
@ -161,7 +164,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
*/
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
if( genotypes == null ) {
if ( !getTableFromSamplesWarningLogged ) {
logger.warn("Genotypes cannot be null.");
getTableFromSamplesWarningLogged = true;
}
return null;
}
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
boolean foundData = false;
@ -195,10 +204,10 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final List<Allele> allAlts,
final int minQScoreToConsider,
final int minCount ) {
int[][] table = new int[2][2];
int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
final int[] myTable = new int[ARRAY_SIZE];
for (final PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
@ -229,16 +238,28 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc,
final int minCount) {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
if( stratifiedPerReadAlleleLikelihoodMap == null ) {
if ( !stratifiedPerReadAlleleLikelihoodMapWarningLogged ) {
logger.warn("stratifiedPerReadAlleleLikelihoodMap cannot be null");
stratifiedPerReadAlleleLikelihoodMapWarningLogged = true;
}
return null;
}
if( vc == null ) {
if ( !inputVariantContextWarningLogged ) {
logger.warn("input vc cannot be null");
inputVariantContextWarningLogged = true;
}
return null;
}
final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final List<Allele> allAlts = vc.getAlternateAlleles();
final int[][] table = new int[2][2];
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4];
final int[] myTable = new int[ARRAY_SIZE];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
@ -286,7 +307,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final boolean matchesAnyAlt = allAlts.contains(allele);
if ( matchesRef || matchesAnyAlt ) {
final int offset = matchesRef ? 0 : 2;
final int offset = matchesRef ? 0 : ARRAY_DIM;
if ( read.isStrandless() ) {
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
@ -320,9 +341,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the array used by the per-sample Strand Bias annotation
*/
private static int[] encodeSBBS( final String string ) {
final int[] array = new int[4];
final int[] array = new int[ARRAY_SIZE];
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
for( int index = 0; index < 4; index++ ) {
for( int index = 0; index < ARRAY_SIZE; index++ ) {
array[index] = Integer.parseInt(tokenizer.nextToken());
}
return array;
@ -334,8 +355,14 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used by the StrandOddsRatio annotation
*/
private static int[][] decodeSBBS( final int[] array ) {
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
final int[][] table = new int[2][2];
if(array.length != ARRAY_SIZE) {
if ( !decodeSBBSWarningLogged ) {
logger.warn("Expecting a length = " + ARRAY_SIZE + " strand bias array.");
decodeSBBSWarningLogged = true;
}
return null;
}
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
table[0][0] = array[0];
table[0][1] = array[1];
table[1][0] = array[2];

View File

@ -162,9 +162,9 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
* @return the augmented table
*/
private static double[][] augmentContingencyTable(final int[][] table) {
double[][] augmentedTable = new double[2][2];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2; j++ )
double[][] augmentedTable = new double[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < ARRAY_DIM; j++ )
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
}

View File

@ -51,12 +51,15 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.collections.Pair;
@ -65,10 +68,7 @@ import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Tandem repeat unit composition and counts per allele
@ -84,15 +84,29 @@ import java.util.Map;
*
*/
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
private static final String STR_PRESENT = "STR";
private static final String REPEAT_UNIT_KEY = "RU";
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) throws UserException {
// Can not be called from HaplotypeCaller
if ( walker instanceof HaplotypeCaller ) {
if ( !walkerIdentityCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, can not be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( !vc.isIndel())
return null;
@ -117,10 +131,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
@Override
public List<String> getKeyNames() {
return Arrays.asList(keyNames);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
}

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -87,21 +87,41 @@ import java.util.*;
*/
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(TransmissionDisequilibriumTest.class);
private Set<Sample> trios = null;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
// Get trios from the input pedigree file.
if ( trios == null ) {
if ( walker instanceof VariantAnnotator ) {
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
} else {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
if (trios == null || trios.isEmpty()) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Transmission disequilibrium test annotation requires a valid ped file be passed in.");
pedigreeCheckWarningLogged = true;
}
return null;
}
}
@ -125,8 +145,10 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
}
// return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT

View File

@ -55,7 +55,6 @@ import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.variant.variantcontext.Genotype;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
import org.testng.Assert;
@ -95,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4f7ebd519451a776c1aa61493ff33943"));
Arrays.asList("92eb47332dd9d7ee7fbe3120dc39c594"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -103,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("8cd16a59e4697beb1c6d75d0b82c8cf5"));
Arrays.asList("c367bf7cebd7b26305f8d4736788aec8"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -129,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a4df0258a61170c74c85b3cd516c8153"));
Arrays.asList("098dcad8d90d90391755a0191c9db59c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -137,7 +136,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("1554af900d1caee1d85824ee85e54398"));
Arrays.asList("f3bbfbc179d2e1bae49890f1e9dfde34"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -145,7 +144,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("11935c8d5cc5a170d06f0b624b31079f"));
Arrays.asList("7267450fc4d002f75a24ca17278e0950"));
executeTest("test exclude annotations", spec);
}
@ -153,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("06b4127795a67bd26156cc1651f3a98b"));
Arrays.asList("18592c72d83ee84e1326acb999518c38"));
executeTest("test overwriting header", spec);
}
@ -271,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
"-L 1:10001292-10012424",
1,
UserException.class
Arrays.asList("87cbf53c65ef4498b721f901f87f0161")
);
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
}
@ -309,10 +308,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testStrandBiasBySample() throws IOException {
// pipeline 1: create variant via HalotypeCaller with no default annotations
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
// pipeline 2: create variant via HalotypeCaller; include StrandBiasBySample, exclude FisherStrand annotation
// re-Annotate the variant with VariantAnnotator using FisherStrand annotation
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
specNoFS.disableShadowBCF();

View File

@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
"-A SnpEff",
1,
UserException.class);
Arrays.asList("037ce3364668ee6527fba80c4f4bff95"));
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
}

View File

@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "dc7906ed73dc071162c98e4bbe77df44");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {

View File

@ -367,7 +367,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fa83f82ba63729edd8696e82bfeea49"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
}

View File

@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -58,6 +57,8 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private static Logger logger = Logger.getLogger(SnpEff.class);
private boolean canAnnotate = true;
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
@ -209,10 +210,15 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
}
}
@Override
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding());
if ( !isValidRodBinding(walker.getSnpEffRodBinding()) ) {
canAnnotate = false;
return;
}
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
@ -221,21 +227,40 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine);
if ( !isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine) ) {
canAnnotate = false;
return;
}
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
canAnnotate = false;
return;
}
}
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can not annotate if failed initialization conditions
if ( !canAnnotate )
return null;
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Get only SnpEff records that start at this locus, not merely span it:
@ -251,7 +276,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
if ( effects.size() == 0 ) {
if ( effects.isEmpty() ) {
return null;
}
@ -260,35 +285,42 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect.getAnnotations();
}
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
private boolean isValidRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
logger.warn("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
"as a rodbinding on the command line via the --snpEffFile option, but " +
"no SnpEff rodbinding was found.");
return false;
}
return true;
}
private void checkSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ) {
private boolean isValidSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ){
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
return false;
}
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"which should be added by all supported versions of SnpEff (%s)",
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
return false;
}
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
throw new UserException(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
logger.warn(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
"is not currently supported by the GATK, and was not run in GATK " +
"compatibility mode. Supported versions are: %s",
snpEffVersionString, supportedSnpEffVersionsString()));
return false;
}
return true;
}
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
@ -377,6 +409,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect;
}
@Override
public List<String> getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
@ -390,6 +423,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),