Merge pull request #773 from broadinstitute/rhl_annotator_warning

Make annotators emit a warning if they can't be applied
This commit is contained in:
ldgauthier 2014-11-12 08:01:19 -05:00
commit bcac930aad
17 changed files with 387 additions and 150 deletions

View File

@ -51,13 +51,16 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele; import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.Genotype;
@ -91,6 +94,12 @@ import java.util.*;
* *
*/ */
public class DepthPerSampleHC extends GenotypeAnnotation { public class DepthPerSampleHC extends GenotypeAnnotation {
private final static Logger logger = Logger.getLogger(DepthPerSampleHC.class);
private boolean walkerIdentityCheckWarningLogged = false;
private boolean alleleLikelihoodMapWarningLogged = false;
private boolean alleleLikelihoodMapSubsetWarningLogged = false;
@Override
public void annotate(final RefMetaDataTracker tracker, public void annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
@ -102,22 +111,42 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) ) if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
return; return;
if (alleleLikelihoodMap == null ) if ( !(walker instanceof HaplotypeCaller) ) {
throw new IllegalStateException("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller"); if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return;
}
if (alleleLikelihoodMap == null ){
if ( !alleleLikelihoodMapWarningLogged ) {
logger.warn("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
alleleLikelihoodMapWarningLogged = true;
}
return;
}
// the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot // the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot
// differentiate between reads that align over the event but aren't informative vs. those that aren't even // differentiate between reads that align over the event but aren't informative vs. those that aren't even
// close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP). // close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
int dp = 0; int dp = 0;
if ( alleleLikelihoodMap.isEmpty() ) { // there are reads
// there are no reads if ( !alleleLikelihoodMap.isEmpty() ) {
} else {
final Set<Allele> alleles = new HashSet<>(vc.getAlleles()); final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext // make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) if ( !alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) {
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet()); if ( !alleleLikelihoodMapSubsetWarningLogged ) {
logger.warn("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
alleleLikelihoodMapSubsetWarningLogged = true;
}
return;
}
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles); final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
@ -130,10 +159,12 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
} }
} }
@Override
public List<String> getKeyNames() { public List<String> getKeyNames() {
return Collections.singletonList(VCFConstants.DEPTH_KEY); return Collections.singletonList(VCFConstants.DEPTH_KEY);
} }
@Override
public List<VCFFormatHeaderLine> getDescriptions() { public List<VCFFormatHeaderLine> getDescriptions() {
return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
} }

View File

@ -54,9 +54,9 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import cern.jet.math.Arithmetic; import cern.jet.math.Arithmetic;
import htsjdk.variant.variantcontext.GenotypesContext; import htsjdk.variant.variantcontext.GenotypesContext;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
@ -94,7 +94,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
private static final String FS = "FS"; private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320; private static final double MIN_PVALUE = 1E-320;
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17; private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
private static final int MIN_COUNT = 2; private static final int MIN_COUNT = ARRAY_DIM;
@Override @Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){ protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
@ -154,10 +154,12 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
return Collections.singletonMap(FS, value); return Collections.singletonMap(FS, value);
} }
@Override
public List<String> getKeyNames() { public List<String> getKeyNames() {
return Collections.singletonList(FS); return Collections.singletonList(FS);
} }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { public List<VCFInfoHeaderLine> getDescriptions() {
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias")); return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
} }
@ -168,15 +170,19 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
* @return the array used by the per-sample Strand Bias annotation * @return the array used by the per-sample Strand Bias annotation
*/ */
public static List<Integer> getContingencyArray( final int[][] table ) { public static List<Integer> getContingencyArray( final int[][] table ) {
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } if(table.length != ARRAY_DIM || table[0].length != ARRAY_DIM) {
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } logger.warn("Expecting a " + ARRAY_DIM + "x" + ARRAY_DIM + " strand bias table.");
final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change return null;
}
final List<Integer> list = new ArrayList<>(ARRAY_SIZE); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
list.add(table[0][0]); list.add(table[0][0]);
list.add(table[0][1]); list.add(table[0][1]);
list.add(table[1][0]); list.add(table[1][0]);
list.add(table[1][1]); list.add(table[1][1]);
return list; return list;
} }
private Double pValueForContingencyTable(int[][] originalTable) { private Double pValueForContingencyTable(int[][] originalTable) {
final int[][] normalizedTable = normalizeContingencyTable(originalTable); final int[][] normalizedTable = normalizeContingencyTable(originalTable);
@ -231,9 +237,9 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE; final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
final int[][] normalized = new int[2][2]; final int[][] normalized = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < 2; i++ ) { for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < 2; j++ ) for ( int j = 0; j < ARRAY_DIM; j++ )
normalized[i][j] = (int)(table[i][j] / normalizationFactor); normalized[i][j] = (int)(table[i][j] / normalizationFactor);
} }
@ -241,10 +247,10 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
} }
private static int [][] copyContingencyTable(int [][] t) { private static int [][] copyContingencyTable(int [][] t) {
int[][] c = new int[2][2]; int[][] c = new int[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < 2; i++ ) for ( int i = 0; i < ARRAY_DIM; i++ )
for ( int j = 0; j < 2; j++ ) for ( int j = 0; j < ARRAY_DIM; j++ )
c[i][j] = t[i][j]; c[i][j] = t[i][j];
return c; return c;
@ -270,21 +276,21 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
} }
private static boolean rotateTable(int[][] table) { private static boolean rotateTable(int[][] table) {
table[0][0] -= 1; table[0][0]--;
table[1][0] += 1; table[1][0]++;
table[0][1] += 1; table[0][1]++;
table[1][1] -= 1; table[1][1]--;
return (table[0][0] >= 0 && table[1][1] >= 0); return (table[0][0] >= 0 && table[1][1] >= 0);
} }
private static boolean unrotateTable(int[][] table) { private static boolean unrotateTable(int[][] table) {
table[0][0] += 1; table[0][0]++;
table[1][0] -= 1; table[1][0]--;
table[0][1] -= 1; table[0][1]--;
table[1][1] += 1; table[1][1]++;
return (table[0][1] >= 0 && table[1][0] >= 0); return (table[0][1] >= 0 && table[1][0] >= 0);
} }

View File

@ -50,7 +50,8 @@
*/ */
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils; import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@ -86,17 +87,33 @@ import java.util.*;
* <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p> * <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p>
*/ */
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(HaplotypeScore.class);
private boolean walkerIdentityCheckWarningLogged = false;
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.'; private final static char REGEXP_WILDCARD = '.';
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if (vc.isSNP() && stratifiedContexts != null) if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc); return annotatePileup(ref, stratifiedContexts, vc);
else else
@ -107,7 +124,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc) { final VariantContext vc) {
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here if (stratifiedContexts.isEmpty()) // empty means that call was made by someone else and we have no data here
return null; return null;
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values()); final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
@ -393,10 +410,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return mismatches - expected; return mismatches - expected;
} }
@Override
public List<String> getKeyNames() { public List<String> getKeyNames() {
return Arrays.asList("HaplotypeScore"); return Arrays.asList("HaplotypeScore");
} }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
} }

View File

@ -52,13 +52,14 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation; import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.QualityUtils;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
@ -66,10 +67,7 @@ import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext; import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
@ -82,17 +80,20 @@ import java.util.Map;
* *
* <h3>Caveats</h3> * <h3>Caveats</h3>
* <ul> * <ul>
* <li>This annotation requires multiple samples and a valid pedigree file.</li> * <li>This annotation requires multiple samples.</li>
* <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li> * <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li>
* <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li> * <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li>
* </ul> * </ul>
*/ */
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation { public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(HardyWeinberg.class);
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
private boolean warningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
@ -101,8 +102,13 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
final GenotypesContext genotypes = vc.getGenotypes(); final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) {
if ( !warningLogged ) {
logger.warn("Too few genotypes");
warningLogged = true;
}
return null; return null;
}
int refCount = 0; int refCount = 0;
int hetCount = 0; int hetCount = 0;
@ -136,7 +142,9 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
return map; return map;
} }
@Override
public List<String> getKeyNames() { return Arrays.asList("HW"); } public List<String> getKeyNames() { return Arrays.asList("HW"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
} }

View File

@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -85,10 +86,12 @@ import java.util.*;
*/ */
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(InbreedingCoeff.class);
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff"; private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
private Set<String> founderIds; private Set<String> founderIds;
private int sampleCount; private int sampleCount;
private boolean pedigreeCheckWarningLogged = false;
@Override @Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
@ -98,10 +101,20 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) { final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
//If available, get the founder IDs and cache them. the IC will only be computed on founders then. //If available, get the founder IDs and cache them. the IC will only be computed on founders then.
if(founderIds == null && walker != null) if(founderIds == null && walker != null) {
founderIds = ((Walker) walker).getSampleDB().getFounderIds(); founderIds = ((Walker) walker).getSampleDB().getFounderIds();
}
if ( founderIds == null || founderIds.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
else{
return makeCoeffAnnotation(vc); return makeCoeffAnnotation(vc);
} }
}
protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) { protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
@ -124,7 +137,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
{ {
if (g.isHetNonRef()) { if (g.isHetNonRef()) {
//all likelihoods go to homCount //all likelihoods go to homCount
homCount += 1; homCount++;
continue; continue;
} }

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.engine.samples.MendelianViolation; import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -93,9 +93,12 @@ import java.util.*;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation { public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(MVLikelihoodRatio.class);
private MendelianViolation mendelianViolation = null; private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR"; public static final String MVLR_KEY = "MVLR";
private Set<Trio> trios; private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
@ -103,15 +106,31 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
// Must have a pedigree file
trios = ((Walker) walker).getSampleDB().getTrios(); trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) { if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, mendelian violation annotation must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
} }
else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
}
}
Map<String,Object> attributeMap = new HashMap<String,Object>(1); Map<String,Object> attributeMap = new HashMap<String,Object>(1);
//double pNoMV = 1.0; //double pNoMV = 1.0;
@ -131,9 +150,11 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
return attributeMap; return attributeMap;
} }
// return the descriptions used for the VCF INFO meta field // return the names and descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); } public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }

View File

@ -51,20 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.samples.Trio; import org.broadinstitute.gatk.engine.samples.Trio;
import org.broadinstitute.gatk.engine.walkers.Walker; import org.broadinstitute.gatk.engine.walkers.Walker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.samples.MendelianViolation; import org.broadinstitute.gatk.engine.samples.MendelianViolation;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -93,6 +94,8 @@ import java.util.*;
public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation { public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(PossibleDeNovo.class);
private MendelianViolation mendelianViolation = null; private MendelianViolation mendelianViolation = null;
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo"; public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo"; public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
@ -101,6 +104,8 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
private final int flatNumberOfSamplesCutoff = 4; private final int flatNumberOfSamplesCutoff = 4;
private Set<Trio> trios; private Set<Trio> trios;
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
@ -108,15 +113,29 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !(walker instanceof VariantAnnotator ) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
trios = ((Walker) walker).getSampleDB().getTrios(); trios = ((Walker) walker).getSampleDB().getTrios();
if ( trios.size() > 0 ) { if ( trios.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
pedigreeCheckWarningLogged = true;
}
return null;
}
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
} }
else {
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
}
}
final Map<String,Object> attributeMap = new HashMap<String,Object>(1); final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
boolean isHighConfDeNovo = false; boolean isHighConfDeNovo = false;
@ -152,8 +171,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
} }
// return the descriptions used for the VCF INFO meta field // return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); } public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"), public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); } new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }

View File

@ -51,22 +51,21 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
@ -83,13 +82,29 @@ import java.util.Map;
*/ */
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(SpanningDeletions.class);
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( stratifiedContexts.size() == 0 ) // Can only call from UnifiedGenotyper
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( stratifiedContexts.isEmpty() )
return null; return null;
// not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1 // not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1
@ -111,7 +126,9 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
return map; return map;
} }
@Override
public List<String> getKeyNames() { return Arrays.asList("Dels"); } public List<String> getKeyNames() { return Arrays.asList("Dels"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
} }

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine; import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -77,34 +78,33 @@ import java.util.*;
*/ */
public abstract class StrandBiasTest extends InfoFieldAnnotation { public abstract class StrandBiasTest extends InfoFieldAnnotation {
private final static Logger logger = Logger.getLogger(StrandBiasTest.class); private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
private static boolean stratifiedPerReadAlleleLikelihoodMapWarningLogged = false;
private static boolean inputVariantContextWarningLogged = false;
private static boolean getTableFromSamplesWarningLogged = false;
private static boolean decodeSBBSWarningLogged = false;
protected static final int ARRAY_DIM = 2;
protected static final int ARRAY_SIZE = ARRAY_DIM * ARRAY_DIM;
@Override @Override
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) { public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
boolean hasSBBSannotation = false; // Does the VCF header contain strand bias (SB) by sample annotation?
for ( final VCFHeaderLine line : headerLines) { for ( final VCFHeaderLine line : headerLines) {
if ( line instanceof VCFFormatHeaderLine) { if ( line instanceof VCFFormatHeaderLine) {
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line; final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) { if ( formatline.getID().equals(VCFConstants.STRAND_BIAS_KEY) ) {
hasSBBSannotation = true; logger.warn("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
break;
}
}
}
if (hasSBBSannotation) {
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail."); "values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
return; return;
} }
}
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
if (hasReads) {
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
return;
} }
logger.info("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output."); // Are there reads from a SAM/BAM file?
if (toolkit.getReadsDataSource().getReaderIDs().isEmpty())
logger.warn("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
else
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
} }
@Override @Override
@ -115,35 +115,38 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final Map<String,AlignmentContext> stratifiedContexts, final Map<String,AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// do not process if not a variant
if ( !vc.isVariant() ) if ( !vc.isVariant() )
return null; return null;
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
if ( vc.hasGenotypes() ) { if ( vc.hasGenotypes() ) {
boolean hasSB = false;
for (final Genotype g : vc.getGenotypes()) { for (final Genotype g : vc.getGenotypes()) {
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) { if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
hasSB = true;
break;
}
}
if (hasSB)
return calculateAnnotationFromGTfield(vc.getGenotypes()); return calculateAnnotationFromGTfield(vc.getGenotypes());
} }
}
}
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided // if a the variant is a snp and has stratified contexts, calculate the annotation from the stratified contexts
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) { //stratifiedContexts can come come from VariantAnnotator, but will be empty if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && !stratifiedContexts.isEmpty()) {
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc); return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
} }
// calculate the annotation from the stratified per read likelihood map
// stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine // stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
else if (stratifiedPerReadAlleleLikelihoodMap != null) { else if (stratifiedPerReadAlleleLikelihoodMap != null) {
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc); return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
} }
else else {
// for non-snp variants, we need per-read likelihoods. // for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup // for snps, we can get same result from simple pileup
// for indels that do not have a computed strand bias (SB) or strand bias by sample (SBBS)
return null; return null;
} }
}
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes); protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
@ -161,7 +164,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation * @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
*/ */
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) { protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); } if( genotypes == null ) {
if ( !getTableFromSamplesWarningLogged ) {
logger.warn("Genotypes cannot be null.");
getTableFromSamplesWarningLogged = true;
}
return null;
}
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
boolean foundData = false; boolean foundData = false;
@ -195,10 +204,10 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final List<Allele> allAlts, final List<Allele> allAlts,
final int minQScoreToConsider, final int minQScoreToConsider,
final int minCount ) { final int minCount ) {
int[][] table = new int[2][2]; int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) { for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4]; final int[] myTable = new int[ARRAY_SIZE];
for (final PileupElement p : sample.getValue().getBasePileup()) { for (final PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
@ -229,16 +238,28 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap, public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc, final VariantContext vc,
final int minCount) { final int minCount) {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); } if( stratifiedPerReadAlleleLikelihoodMap == null ) {
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); } if ( !stratifiedPerReadAlleleLikelihoodMapWarningLogged ) {
logger.warn("stratifiedPerReadAlleleLikelihoodMap cannot be null");
stratifiedPerReadAlleleLikelihoodMapWarningLogged = true;
}
return null;
}
if( vc == null ) {
if ( !inputVariantContextWarningLogged ) {
logger.warn("input vc cannot be null");
inputVariantContextWarningLogged = true;
}
return null;
}
final Allele ref = vc.getReference(); final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount(); final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final List<Allele> allAlts = vc.getAlternateAlleles(); final List<Allele> allAlts = vc.getAlternateAlleles();
final int[][] table = new int[2][2]; final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4]; final int[] myTable = new int[ARRAY_SIZE];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) { for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey(); final GATKSAMRecord read = el.getKey();
@ -286,7 +307,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final boolean matchesAnyAlt = allAlts.contains(allele); final boolean matchesAnyAlt = allAlts.contains(allele);
if ( matchesRef || matchesAnyAlt ) { if ( matchesRef || matchesAnyAlt ) {
final int offset = matchesRef ? 0 : 2; final int offset = matchesRef ? 0 : ARRAY_DIM;
if ( read.isStrandless() ) { if ( read.isStrandless() ) {
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1 // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
@ -320,9 +341,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the array used by the per-sample Strand Bias annotation * @return the array used by the per-sample Strand Bias annotation
*/ */
private static int[] encodeSBBS( final String string ) { private static int[] encodeSBBS( final String string ) {
final int[] array = new int[4]; final int[] array = new int[ARRAY_SIZE];
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false); final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
for( int index = 0; index < 4; index++ ) { for( int index = 0; index < ARRAY_SIZE; index++ ) {
array[index] = Integer.parseInt(tokenizer.nextToken()); array[index] = Integer.parseInt(tokenizer.nextToken());
} }
return array; return array;
@ -334,8 +355,14 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
* @return the table used by the StrandOddsRatio annotation * @return the table used by the StrandOddsRatio annotation
*/ */
private static int[][] decodeSBBS( final int[] array ) { private static int[][] decodeSBBS( final int[] array ) {
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); } if(array.length != ARRAY_SIZE) {
final int[][] table = new int[2][2]; if ( !decodeSBBSWarningLogged ) {
logger.warn("Expecting a length = " + ARRAY_SIZE + " strand bias array.");
decodeSBBSWarningLogged = true;
}
return null;
}
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
table[0][0] = array[0]; table[0][0] = array[0];
table[0][1] = array[1]; table[0][1] = array[1];
table[1][0] = array[2]; table[1][0] = array[2];

View File

@ -162,9 +162,9 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
* @return the augmented table * @return the augmented table
*/ */
private static double[][] augmentContingencyTable(final int[][] table) { private static double[][] augmentContingencyTable(final int[][] table) {
double[][] augmentedTable = new double[2][2]; double[][] augmentedTable = new double[ARRAY_DIM][ARRAY_DIM];
for ( int i = 0; i < 2; i++ ) { for ( int i = 0; i < ARRAY_DIM; i++ ) {
for ( int j = 0; j < 2; j++ ) for ( int j = 0; j < ARRAY_DIM; j++ )
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT; augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
} }

View File

@ -51,12 +51,15 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.collections.Pair;
@ -65,10 +68,7 @@ import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.Arrays; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Tandem repeat unit composition and counts per allele * Tandem repeat unit composition and counts per allele
@ -84,15 +84,29 @@ import java.util.Map;
* *
*/ */
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation { public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
private static final String STR_PRESENT = "STR"; private static final String STR_PRESENT = "STR";
private static final String REPEAT_UNIT_KEY = "RU"; private static final String REPEAT_UNIT_KEY = "RU";
private static final String REPEATS_PER_ALLELE_KEY = "RPA"; private static final String REPEATS_PER_ALLELE_KEY = "RPA";
private boolean walkerIdentityCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) throws UserException {
// Can not be called from HaplotypeCaller
if ( walker instanceof HaplotypeCaller ) {
if ( !walkerIdentityCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, can not be called from HaplotypeCaller");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
if ( !vc.isIndel()) if ( !vc.isIndel())
return null; return null;
@ -117,10 +131,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"), new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") }; new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
@Override
public List<String> getKeyNames() { public List<String> getKeyNames() {
return Arrays.asList(keyNames); return Arrays.asList(keyNames);
} }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
} }

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -87,21 +87,41 @@ import java.util.*;
*/ */
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation { public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
private final static Logger logger = Logger.getLogger(TransmissionDisequilibriumTest.class);
private Set<Sample> trios = null; private Set<Sample> trios = null;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
private boolean walkerIdentityCheckWarningLogged = false;
private boolean pedigreeCheckWarningLogged = false;
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){ final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
walkerIdentityCheckWarningLogged = true;
}
return null;
}
// Get trios from the input pedigree file.
if ( trios == null ) { if ( trios == null ) {
if ( walker instanceof VariantAnnotator ) {
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents(); trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
} else { if (trios == null || trios.isEmpty()) {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); if ( !pedigreeCheckWarningLogged ) {
logger.warn("Transmission disequilibrium test annotation requires a valid ped file be passed in.");
pedigreeCheckWarningLogged = true;
}
return null;
} }
} }
@ -125,8 +145,10 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
} }
// return the descriptions used for the VCF INFO meta field // return the descriptions used for the VCF INFO meta field
@Override
public List<String> getKeyNames() { return Arrays.asList("TDT"); } public List<String> getKeyNames() { return Arrays.asList("TDT"); }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT

View File

@ -55,7 +55,6 @@ import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream; import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.Genotype;
import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFCodec;
import org.testng.Assert; import org.testng.Assert;
@ -95,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() { public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4f7ebd519451a776c1aa61493ff33943")); Arrays.asList("92eb47332dd9d7ee7fbe3120dc39c594"));
executeTest("test file has annotations, asking for annotations, #1", spec); executeTest("test file has annotations, asking for annotations, #1", spec);
} }
@ -103,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() { public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("8cd16a59e4697beb1c6d75d0b82c8cf5")); Arrays.asList("c367bf7cebd7b26305f8d4736788aec8"));
executeTest("test file has annotations, asking for annotations, #2", spec); executeTest("test file has annotations, asking for annotations, #2", spec);
} }
@ -129,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() { public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a4df0258a61170c74c85b3cd516c8153")); Arrays.asList("098dcad8d90d90391755a0191c9db59c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec); executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
} }
@ -137,7 +136,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() { public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("1554af900d1caee1d85824ee85e54398")); Arrays.asList("f3bbfbc179d2e1bae49890f1e9dfde34"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec); executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
} }
@ -145,7 +144,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() { public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("11935c8d5cc5a170d06f0b624b31079f")); Arrays.asList("7267450fc4d002f75a24ca17278e0950"));
executeTest("test exclude annotations", spec); executeTest("test exclude annotations", spec);
} }
@ -153,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() { public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("06b4127795a67bd26156cc1651f3a98b")); Arrays.asList("18592c72d83ee84e1326acb999518c38"));
executeTest("test overwriting header", spec); executeTest("test overwriting header", spec);
} }
@ -271,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " + "--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
"-L 1:10001292-10012424", "-L 1:10001292-10012424",
1, 1,
UserException.class Arrays.asList("87cbf53c65ef4498b721f901f87f0161")
); );
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec); executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
} }
@ -309,10 +308,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testStrandBiasBySample() throws IOException { public void testStrandBiasBySample() throws IOException {
// pipeline 1: create variant via HalotypeCaller with no default annotations
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800"; final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("")); final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0); final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
// pipeline 2: create variant via HalotypeCaller; include StrandBiasBySample, exclude FisherStrand annotation
// re-Annotate the variant with VariantAnnotator using FisherStrand annotation
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample"; final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList("")); final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
specNoFS.disableShadowBCF(); specNoFS.disableShadowBCF();

View File

@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
"-A SnpEff", "-A SnpEff",
1, 1,
UserException.class); Arrays.asList("037ce3364668ee6527fba80c4f4bff95"));
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec); executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
} }

View File

@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleComplex1() { public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d"); HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "dc7906ed73dc071162c98e4bbe77df44");
} }
private void HCTestSymbolicVariants(String bam, String args, String md5) { private void HCTestSymbolicVariants(String bam, String args, String md5) {

View File

@ -367,7 +367,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testLackSensitivityDueToBadHaplotypeSelectionFix() { public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list"); b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fa83f82ba63729edd8696e82bfeea49"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
} }

View File

@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.GATKVCFUtils;
import htsjdk.variant.vcf.*; import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -58,6 +57,8 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private static Logger logger = Logger.getLogger(SnpEff.class); private static Logger logger = Logger.getLogger(SnpEff.class);
private boolean canAnnotate = true;
// We refuse to parse SnpEff output files generated by unsupported versions, or // We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header: // lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" }; public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
@ -209,10 +210,15 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
} }
} }
@Override
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile): // without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding()); if ( !isValidRodBinding(walker.getSnpEffRodBinding()) ) {
canAnnotate = false;
return;
}
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
@ -221,21 +227,40 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine); if ( !isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine) ) {
canAnnotate = false;
return;
}
// If everything looks ok, add the SnpEff version number and command-line header lines to the // If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be // header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file: // mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
// Can only be called from VariantAnnotator
if ( !(walker instanceof VariantAnnotator) ) {
if ( walker != null )
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
else
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
canAnnotate = false;
return;
}
} }
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// Can not annotate if failed initialization conditions
if ( !canAnnotate )
return null;
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Get only SnpEff records that start at this locus, not merely span it: // Get only SnpEff records that start at this locus, not merely span it:
@ -251,7 +276,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects: // Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord); List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
if ( effects.size() == 0 ) { if ( effects.isEmpty() ) {
return null; return null;
} }
@ -260,35 +285,42 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect.getAnnotations(); return mostSignificantEffect.getAnnotations();
} }
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) { private boolean isValidRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) { if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " + logger.warn("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
"as a rodbinding on the command line via the --snpEffFile option, but " + "as a rodbinding on the command line via the --snpEffFile option, but " +
"no SnpEff rodbinding was found."); "no SnpEff rodbinding was found.");
return false;
} }
return true;
} }
private void checkSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ) { private boolean isValidSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ){
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) { if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " + logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"and so could not verify that the file was generated by a supported version of SnpEff (%s)", "and so could not verify that the file was generated by a supported version of SnpEff (%s)",
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString())); SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
return false;
} }
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) { if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " + logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"which should be added by all supported versions of SnpEff (%s)", "which should be added by all supported versions of SnpEff (%s)",
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString())); SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
return false;
} }
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0]; String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) { if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
throw new UserException(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " + logger.warn(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
"is not currently supported by the GATK, and was not run in GATK " + "is not currently supported by the GATK, and was not run in GATK " +
"compatibility mode. Supported versions are: %s", "compatibility mode. Supported versions are: %s",
snpEffVersionString, supportedSnpEffVersionsString())); snpEffVersionString, supportedSnpEffVersionsString()));
return false;
} }
return true;
} }
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) { private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
@ -377,6 +409,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return mostSignificantEffect; return mostSignificantEffect;
} }
@Override
public List<String> getKeyNames() { public List<String> getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(), return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(), InfoFieldKey.IMPACT_KEY.getKeyName(),
@ -390,6 +423,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
); );
} }
@Override
public List<VCFInfoHeaderLine> getDescriptions() { public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList( return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"), new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),