Merge pull request #773 from broadinstitute/rhl_annotator_warning

Make annotators emit a warning if they can't be applied
This commit is contained in:
ldgauthier 2014-11-12 08:01:19 -05:00
commit bcac930aad
17 changed files with 387 additions and 150 deletions

View File

@ -51,13 +51,16 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
@ -91,6 +94,12 @@ import java.util.*;
*
*/
public class DepthPerSampleHC extends GenotypeAnnotation {
private final static Logger logger = Logger.getLogger(DepthPerSampleHC.class);
private boolean walkerIdentityCheckWarningLogged = false;
private boolean alleleLikelihoodMapWarningLogged = false;
private boolean alleleLikelihoodMapSubsetWarningLogged = false;
@Override
public void annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
@ -98,26 +107,46 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
final VariantContext vc,
final Genotype g,
final GenotypeBuilder gb,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
return;
if (alleleLikelihoodMap == null )
throw new IllegalStateException("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
if ( !(walker instanceof HaplotypeCaller) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return;
}
if (alleleLikelihoodMap == null ){
if ( !alleleLikelihoodMapWarningLogged ) {
logger.warn("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
alleleLikelihoodMapWarningLogged = true;
}
return;
}
// the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot
// differentiate between reads that align over the event but aren't informative vs. those that aren't even
// close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
int dp = 0;
if ( alleleLikelihoodMap.isEmpty() ) {
// there are no reads
} else {
// there are reads
if ( !alleleLikelihoodMap.isEmpty() ) {
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! alleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
if ( !alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) {
if ( !alleleLikelihoodMapSubsetWarningLogged ) {
logger.warn("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
alleleLikelihoodMapSubsetWarningLogged = true;
}
return;
}
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
@ -130,10 +159,12 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
}
}
@Override
public List<String> getKeyNames() {
return Collections.singletonList(VCFConstants.DEPTH_KEY);
}
@Override
public List<VCFFormatHeaderLine> getDescriptions() {
return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
}

View File

@ -54,9 +54,9 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import cern.jet.math.Arithmetic;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType;
@ -94,7 +94,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
private static final int MIN_COUNT = 2;
private static final int MIN_COUNT = ARRAY_DIM;
@Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
@ -154,10 +154,12 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
return Collections.singletonMap(FS, value);
}
@Override
public List<String> getKeyNames() {
return Collections.singletonList(FS);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
}
@ -168,15 +170,19 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
* @return the array used by the per-sample Strand Bias annotation
*/
public static List<Integer> getContingencyArray( final int[][] table ) {
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
if(table.length != ARRAY_DIM || table[0].length != ARRAY_DIM) {
logger.warn("Expecting a " + ARRAY_DIM + "x" + ARRAY_DIM + " strand bias table.");
return null;
}
final List<Integer> list = new ArrayList<>(ARRAY_SIZE); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
list.add(table[0][0]);
list.add(table[0][1]);
list.add(table[1][0]);
list.add(table[1][1]);
return list;
}
private Double pValueForContingencyTable(int[][] originalTable) {
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
@ -231,9 +237,9 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
final int[][] normalized = new int[2][2];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2; j++ )
final int[][] normalized = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < ARRAY_DIM; j++ )
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
}
@ -241,10 +247,10 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
}
private static int [][] copyContingencyTable(int [][] t) {
int[][] c = new int[2][2];
int[][] c = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < 2; i++ )
for ( int j = 0; j < 2; j++ )
for ( int i = 0; i < ARRAY_DIM; i++ )
for ( int j = 0; j < ARRAY_DIM; j++ )
c[i][j] = t[i][j];
return c;
@ -270,21 +276,21 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
}
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
table[0][0]--;
table[1][0]++;
table[0][1] += 1;
table[1][1] -= 1;
table[0][1]++;
table[1][1]--;
return (table[0][0] >= 0 && table[1][1] >= 0);
}
private static boolean unrotateTable(int[][] table) {
table[0][0] += 1;
table[1][0] -= 1;
table[0][0]++;
table[1][0]--;
table[0][1] -= 1;
table[1][1] += 1;
table[0][1]--;
table[1][1]++;
return (table[0][1] >= 0 && table[1][0] >= 0);
}

View File

@ -50,7 +50,8 @@
*/
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@ -86,17 +87,33 @@ import java.util.*;
* <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p>
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(HaplotypeScore.class);
private boolean walkerIdentityCheckWarningLogged = false;
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.';
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc);
else
@ -107,7 +124,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc) {
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
if (stratifiedContexts.isEmpty()) // empty means that call was made by someone else and we have no data here
return null;
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
@ -393,10 +410,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return mismatches - expected;
}
@Override
public List<String> getKeyNames() {
return Arrays.asList("HaplotypeScore");
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
}

View File

@ -52,13 +52,14 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
@ -66,10 +67,7 @@ import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -82,17 +80,20 @@ import java.util.Map;
*
* <h3>Caveats</h3>
* <ul>
* <li>This annotation requires multiple samples and a valid pedigree file.</li>
* <li>This annotation requires multiple samples.</li>
* <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li>
* <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li>
* </ul>
*/
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(HardyWeinberg.class);
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
private boolean warningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
@ -101,8 +102,13 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) {
if ( !warningLogged ) {
logger.warn("Too few genotypes");
warningLogged = true;
}
return null;
}
int refCount = 0;
int hetCount = 0;
@ -136,7 +142,9 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
return map;
}
@Override
public List<String> getKeyNames() { return Arrays.asList("HW"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
}

View File

@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -85,10 +86,12 @@ import java.util.*;
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(InbreedingCoeff.class);
private static final int MIN_SAMPLES = 10;
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
private Set<String> founderIds;
private int sampleCount;
private boolean pedigreeCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
@ -98,9 +101,19 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
if(founderIds == null && walker != null)
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
return makeCoeffAnnotation(vc);
if(founderIds == null && walker != null) {
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
}
if ( founderIds == null || founderIds.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
else{
return makeCoeffAnnotation(vc);
}
}
protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
@ -124,7 +137,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
{
if (g.isHetNonRef()) {
//all likelihoods go to homCount
homCount += 1;
homCount++;
continue;
}

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -93,9 +93,12 @@ import java.util.*;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(MVLikelihoodRatio.class);
private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";
private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -103,14 +106,30 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) {
// Must have a pedigree file
trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, mendelian violation annotation must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
@ -131,9 +150,11 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
return attributeMap;
}
// return the descriptions used for the VCF INFO meta field
// return the names and descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }

View File

@ -51,20 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.samples.Trio;
import org.broadinstitute.gatk.engine.walkers.Walker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -81,7 +82,7 @@ import java.util.*;
* <li>Only reports possible de novos for children whose genotypes have not been tagged as filtered (which is most appropriate if parent likelihoods
* have already been factored in using PhaseByTransmission).</li>
* <li>When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.</li>
* <li>This annotation can only be used from the Variant Annotator.If you attempt to use it from the UnifiedGenotyper, the run will fail with an error message to that effect. If you attempt to use it from the HaplotypeCaller, the run will complete successfully but the annotation will not be added to any variants.</li>
* <li>This annotation can only be used from the Variant Annotator. If you attempt to use it from the UnifiedGenotyper, the run will fail with an error message to that effect. If you attempt to use it from the HaplotypeCaller, the run will complete successfully but the annotation will not be added to any variants.</li>
* </ul>
*
* <h3>Related annotations</h3>
@ -93,6 +94,8 @@ import java.util.*;
public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(PossibleDeNovo.class);
private MendelianViolation mendelianViolation = null;
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
@ -101,6 +104,8 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
private final int flatNumberOfSamplesCutoff = 4;
private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -108,14 +113,28 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !(walker instanceof VariantAnnotator ) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) {
trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
@ -152,8 +171,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
}
// return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }

View File

@ -51,22 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -83,13 +82,29 @@ import java.util.Map;
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(SpanningDeletions.class);
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( stratifiedContexts.size() == 0 )
// Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( stratifiedContexts.isEmpty() )
return null;
// not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1
@ -111,7 +126,9 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
return map;
}
@Override
public List<String> getKeyNames() { return Arrays.asList("Dels"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
}

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.log4j.Logger;
@ -77,34 +78,33 @@ import java.util.*;
*/
public abstract class StrandBiasTest extends InfoFieldAnnotation {
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
private static boolean stratifiedPerReadAlleleLikelihoodMapWarningLogged = false;
private static boolean inputVariantContextWarningLogged = false;
private static boolean getTableFromSamplesWarningLogged = false;
private static boolean decodeSBBSWarningLogged = false;
protected static final int ARRAY_DIM = 2;
protected static final int ARRAY_SIZE = ARRAY_DIM * ARRAY_DIM;
@Override
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
boolean hasSBBSannotation = false;
// Does the VCF header contain strand bias (SB) by sample annotation?
for ( final VCFHeaderLine line : headerLines) {
if ( line instanceof VCFFormatHeaderLine) {
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
hasSBBSannotation = true;
break;
if ( formatline.getID().equals(VCFConstants.STRAND_BIAS_KEY) ) {
logger.warn("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
return;
}
}
}
if (hasSBBSannotation) {
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
return;
}
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
if (hasReads) {
// Are there reads from a SAM/BAM file?
if (toolkit.getReadsDataSource().getReaderIDs().isEmpty())
logger.warn("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
else
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
return;
}
logger.info("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
}
@Override
@ -115,34 +115,37 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final Map<String,AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// do not process if not a variant
if ( !vc.isVariant() )
return null;
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
if ( vc.hasGenotypes() ) {
boolean hasSB = false;
for (final Genotype g : vc.getGenotypes()) {
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
hasSB = true;
break;
return calculateAnnotationFromGTfield(vc.getGenotypes());
}
}
if (hasSB)
return calculateAnnotationFromGTfield(vc.getGenotypes());
}
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) {
// if a the variant is a snp and has stratified contexts, calculate the annotation from the stratified contexts
//stratifiedContexts can come come from VariantAnnotator, but will be empty if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && !stratifiedContexts.isEmpty()) {
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
}
//stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
// calculate the annotation from the stratified per read likelihood map
// stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
}
else
// for non-snp variants, we need per-read likelihoods.
else {
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
// for indels that do not have a computed strand bias (SB) or strand bias by sample (SBBS)
return null;
}
}
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
@ -161,7 +164,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
*/
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
if( genotypes == null ) {
if ( !getTableFromSamplesWarningLogged ) {
logger.warn("Genotypes cannot be null.");
getTableFromSamplesWarningLogged = true;
}
return null;
}
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
boolean foundData = false;
@ -195,10 +204,10 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final List<Allele> allAlts,
final int minQScoreToConsider,
final int minCount ) {
int[][] table = new int[2][2];
int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
final int[] myTable = new int[ARRAY_SIZE];
for (final PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
@ -229,16 +238,28 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc,
final int minCount) {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
if( stratifiedPerReadAlleleLikelihoodMap == null ) {
if ( !stratifiedPerReadAlleleLikelihoodMapWarningLogged ) {
logger.warn("stratifiedPerReadAlleleLikelihoodMap cannot be null");
stratifiedPerReadAlleleLikelihoodMapWarningLogged = true;
}
return null;
}
if( vc == null ) {
if ( !inputVariantContextWarningLogged ) {
logger.warn("input vc cannot be null");
inputVariantContextWarningLogged = true;
}
return null;
}
final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final List<Allele> allAlts = vc.getAlternateAlleles();
final int[][] table = new int[2][2];
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4];
final int[] myTable = new int[ARRAY_SIZE];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
@ -286,7 +307,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final boolean matchesAnyAlt = allAlts.contains(allele);
if ( matchesRef || matchesAnyAlt ) {
final int offset = matchesRef ? 0 : 2;
final int offset = matchesRef ? 0 : ARRAY_DIM;
if ( read.isStrandless() ) {
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
@ -320,9 +341,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the array used by the per-sample Strand Bias annotation
*/
private static int[] encodeSBBS( final String string ) {
final int[] array = new int[4];
final int[] array = new int[ARRAY_SIZE];
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
for( int index = 0; index < 4; index++ ) {
for( int index = 0; index < ARRAY_SIZE; index++ ) {
array[index] = Integer.parseInt(tokenizer.nextToken());
}
return array;
@ -334,8 +355,14 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used by the StrandOddsRatio annotation
*/
private static int[][] decodeSBBS( final int[] array ) {
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
final int[][] table = new int[2][2];
if(array.length != ARRAY_SIZE) {
if ( !decodeSBBSWarningLogged ) {
logger.warn("Expecting a length = " + ARRAY_SIZE + " strand bias array.");
decodeSBBSWarningLogged = true;
}
return null;
}
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
table[0][0] = array[0];
table[0][1] = array[1];
table[1][0] = array[2];

View File

@ -162,9 +162,9 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
* @return the augmented table
*/
private static double[][] augmentContingencyTable(final int[][] table) {
double[][] augmentedTable = new double[2][2];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2; j++ )
double[][] augmentedTable = new double[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < ARRAY_DIM; j++ )
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
}

View File

@ -51,12 +51,15 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.collections.Pair;
@ -65,10 +68,7 @@ import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Tandem repeat unit composition and counts per allele
@ -84,15 +84,29 @@ import java.util.Map;
*
*/
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
private static final String STR_PRESENT = "STR";
private static final String REPEAT_UNIT_KEY = "RU";
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) throws UserException {
// Can not be called from HaplotypeCaller
if ( walker instanceof HaplotypeCaller ) {
if ( !walkerIdentityCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, can not be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( !vc.isIndel())
return null;
@ -117,10 +131,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
@Override
public List<String> getKeyNames() {
return Arrays.asList(keyNames);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
}

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -87,21 +87,41 @@ import java.util.*;
*/
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(TransmissionDisequilibriumTest.class);
private Set<Sample> trios = null;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
// Get trios from the input pedigree file.
if ( trios == null ) {
if ( walker instanceof VariantAnnotator ) {
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
} else {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
if (trios == null || trios.isEmpty()) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Transmission disequilibrium test annotation requires a valid ped file be passed in.");
pedigreeCheckWarningLogged = true;
}
return null;
}
}
@ -125,8 +145,10 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
}
// return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT

View File

@ -55,7 +55,6 @@ import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.variant.variantcontext.Genotype;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
import org.testng.Assert;
@ -95,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4f7ebd519451a776c1aa61493ff33943"));
Arrays.asList("92eb47332dd9d7ee7fbe3120dc39c594"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -103,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("8cd16a59e4697beb1c6d75d0b82c8cf5"));
Arrays.asList("c367bf7cebd7b26305f8d4736788aec8"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -129,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a4df0258a61170c74c85b3cd516c8153"));
Arrays.asList("098dcad8d90d90391755a0191c9db59c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -137,7 +136,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("1554af900d1caee1d85824ee85e54398"));
Arrays.asList("f3bbfbc179d2e1bae49890f1e9dfde34"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -145,7 +144,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("11935c8d5cc5a170d06f0b624b31079f"));
Arrays.asList("7267450fc4d002f75a24ca17278e0950"));
executeTest("test exclude annotations", spec);
}
@ -153,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("06b4127795a67bd26156cc1651f3a98b"));
Arrays.asList("18592c72d83ee84e1326acb999518c38"));
executeTest("test overwriting header", spec);
}
@ -271,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
"-L 1:10001292-10012424",
1,
UserException.class
Arrays.asList("87cbf53c65ef4498b721f901f87f0161")
);
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
}
@ -309,10 +308,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testStrandBiasBySample() throws IOException {
// pipeline 1: create variant via HalotypeCaller with no default annotations
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
// pipeline 2: create variant via HalotypeCaller; include StrandBiasBySample, exclude FisherStrand annotation
// re-Annotate the variant with VariantAnnotator using FisherStrand annotation
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
specNoFS.disableShadowBCF();

View File

@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
"-A SnpEff",
1,
UserException.class);
Arrays.asList("037ce3364668ee6527fba80c4f4bff95"));
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
}

View File

@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "dc7906ed73dc071162c98e4bbe77df44");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {

View File

@ -367,7 +367,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fa83f82ba63729edd8696e82bfeea49"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
}

View File

@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
@ -58,6 +57,8 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private static Logger logger = Logger.getLogger(SnpEff.class);
private boolean canAnnotate = true;
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
@ -209,10 +210,15 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
}
}
@Override
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding());
if ( !isValidRodBinding(walker.getSnpEffRodBinding()) ) {
canAnnotate = false;
return;
}
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
@ -221,21 +227,40 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine);
if ( !isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine) ) {
canAnnotate = false;
return;
}
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
canAnnotate = false;
return;
}
}
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can not annotate if failed initialization conditions
if ( !canAnnotate )
return null;
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Get only SnpEff records that start at this locus, not merely span it:
@ -251,7 +276,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
if ( effects.size() == 0 ) {
if ( effects.isEmpty() ) {
return null;
}
@ -260,35 +285,42 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect.getAnnotations();
}
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
private boolean isValidRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
logger.warn("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
"as a rodbinding on the command line via the --snpEffFile option, but " +
"no SnpEff rodbinding was found.");
return false;
}
return true;
}
private void checkSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ) {
private boolean isValidSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ){
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
return false;
}
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"which should be added by all supported versions of SnpEff (%s)",
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
return false;
}
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
throw new UserException(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
"is not currently supported by the GATK, and was not run in GATK " +
"compatibility mode. Supported versions are: %s",
snpEffVersionString, supportedSnpEffVersionsString()));
logger.warn(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
"is not currently supported by the GATK, and was not run in GATK " +
"compatibility mode. Supported versions are: %s",
snpEffVersionString, supportedSnpEffVersionsString()));
return false;
}
return true;
}
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
@ -377,6 +409,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect;
}
@Override
public List<String> getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
@ -390,6 +423,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
);
}
@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),