diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java index f257a5960..57b5cc1a4 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java @@ -47,6 +47,8 @@ package org.broadinstitute.gatk.tools.walkers.annotator; import cern.jet.math.Arithmetic; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypesContext; import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; @@ -87,42 +89,34 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation, private static final int MIN_QUAL_FOR_FILTERED_TEST = 17; private static final int MIN_COUNT = 2; - public Map annotate(final RefMetaDataTracker tracker, - final AnnotatorCompatible walker, - final ReferenceContext ref, - final Map stratifiedContexts, - final VariantContext vc, - final Map stratifiedPerReadAlleleLikelihoodMap) { - if ( !vc.isVariant() ) - return null; - - if ( vc.hasGenotypes() ) { - final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT ); - if ( tableFromPerSampleAnnotations != null ) { - return pValueForBestTable(tableFromPerSampleAnnotations, null); - } - } - - if (vc.isSNP() && stratifiedContexts != null) { - final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); - final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT); - printTable("unfiltered", tableNoFiltering); - printTable("filtered", tableFiltering); - return pValueForBestTable(tableFiltering, tableNoFiltering); - } - else if (stratifiedPerReadAlleleLikelihoodMap != null) { - // either SNP with no alignment context, or indels: per-read likelihood map needed - final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); - //logger.info("VC " + vc); - //printTable(table, 0.0); - return pValueForBestTable(table, null); - } - else - // for non-snp variants, we need per-read likelihoods. - // for snps, we can get same result from simple pileup - return null; + @Override + protected Map calculateAnnotationFromGTfield(final GenotypesContext genotypes){ + final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT ); + return ( tableFromPerSampleAnnotations != null )? pValueForBestTable(tableFromPerSampleAnnotations, null) : null; } + @Override + protected Map calculateAnnotationFromStratifiedContexts(final Map stratifiedContexts, + final VariantContext vc){ + final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); + final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT); + printTable("unfiltered", tableNoFiltering); + printTable("filtered", tableFiltering); + return pValueForBestTable(tableFiltering, tableNoFiltering); + } + + @Override + protected Map calculateAnnotationFromLikelihoodMap(final Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc){ + // either SNP with no alignment context, or indels: per-read likelihood map needed + final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); + //logger.info("VC " + vc); + //printTable(table, 0.0); + return pValueForBestTable(table, null); + } + + + /** * Create an annotation for the highest (i.e., least significant) p-value of table1 and table2 * diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java index 435de0f04..f2d72f8b0 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java @@ -48,7 +48,15 @@ package org.broadinstitute.gatk.tools.walkers.annotator; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFormatHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; +import org.broadinstitute.gatk.engine.contexts.ReferenceContext; +import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.GenotypesContext; @@ -64,6 +72,83 @@ import java.util.*; * Class of tests to detect strand bias. */ public abstract class StrandBiasTest extends InfoFieldAnnotation { + private final static Logger logger = Logger.getLogger(StrandBiasTest.class); + + @Override + public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set headerLines) { + boolean hasSBBSannotation = false; + for ( final VCFHeaderLine line : headerLines) { + if ( line instanceof VCFFormatHeaderLine) { + final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line; + if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) { + hasSBBSannotation = true; + break; + } + } + } + + if (hasSBBSannotation) { + logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " + + "values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail."); + return; + } + + boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0; + if (hasReads) { + logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values."); + return; + } + + logger.info(new String("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.")); + + } + + @Override + //template method for calculating strand bias annotations using the three different methods + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { + if ( !vc.isVariant() ) + return null; + + if ( vc.hasGenotypes() ) { + boolean hasSB = false; + for (final Genotype g : vc.getGenotypes()) { + if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) { + hasSB = true; + break; + } + } + if (hasSB) + return calculateAnnotationFromGTfield(vc.getGenotypes()); + } + + //stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided + if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) { + return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc); + } + + //stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine + else if (stratifiedPerReadAlleleLikelihoodMap != null) { + return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc); + } + else + // for non-snp variants, we need per-read likelihoods. + // for snps, we can get same result from simple pileup + return null; + } + + protected abstract Map calculateAnnotationFromGTfield(final GenotypesContext genotypes); + + protected abstract Map calculateAnnotationFromStratifiedContexts(final Map stratifiedContexts, + final VariantContext vc); + + protected abstract Map calculateAnnotationFromLikelihoodMap(final Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc); + /** * Create the contingency table by retrieving the per-sample strand bias annotation and adding them together * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation @@ -108,9 +193,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation { final int minCount ) { int[][] table = new int[2][2]; - for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + for (final Map.Entry sample : stratifiedContexts.entrySet() ) { final int[] myTable = new int[4]; - for (PileupElement p : sample.getValue().getBasePileup()) { + for (final PileupElement p : sample.getValue().getBasePileup()) { if ( ! isUsableBase(p) ) // ignore deletions and bad MQ continue; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java index f2034944d..d23ac281a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java @@ -46,6 +46,8 @@ package org.broadinstitute.gatk.tools.walkers.annotator; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypesContext; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; @@ -79,38 +81,30 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased private static final String SOR = "SOR"; @Override - public Map annotate(final RefMetaDataTracker tracker, - final AnnotatorCompatible walker, - final ReferenceContext ref, - final Map stratifiedContexts, - final VariantContext vc, - final Map stratifiedPerReadAlleleLikelihoodMap) { - if ( !vc.isVariant() ) - return null; - - if ( vc.hasGenotypes() ) { - final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT ); - if ( tableFromPerSampleAnnotations != null ) { - final double ratio = calculateSOR(tableFromPerSampleAnnotations); - return annotationForOneTable(ratio); - } - } - - if (vc.isSNP() && stratifiedContexts != null) { - final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); - final double ratio = calculateSOR(tableNoFiltering); + protected Map calculateAnnotationFromGTfield(GenotypesContext genotypes){ + final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT ); + if ( tableFromPerSampleAnnotations != null ) { + final double ratio = calculateSOR(tableFromPerSampleAnnotations); return annotationForOneTable(ratio); } - else if (stratifiedPerReadAlleleLikelihoodMap != null) { - // either SNP with no alignment context, or indels: per-read likelihood map needed - final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); - final double ratio = calculateSOR(table); - return annotationForOneTable(ratio); - } - else - // for non-snp variants, we need per-read likelihoods. - // for snps, we can get same result from simple pileup - return null; + return null; + } + + @Override + protected Map calculateAnnotationFromStratifiedContexts(Map stratifiedContexts, + final VariantContext vc){ + final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); + final double ratio = calculateSOR(tableNoFiltering); + return annotationForOneTable(ratio); + } + + @Override + protected Map calculateAnnotationFromLikelihoodMap(Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc){ + // either SNP with no alignment context, or indels: per-read likelihood map needed + final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); + final double ratio = calculateSOR(table); + return annotationForOneTable(ratio); } /** diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java index 437aa9372..efd2a0aac 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -155,7 +155,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("1c423b7730b9805e7b885ece924286e0")); + Arrays.asList("e86d04320dc9a597c1f58ad4b03b33c3")); executeTest("not passing it any reads", spec); } @@ -163,7 +163,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("54d7d5bb9404652857adf5e50d995f30")); + Arrays.asList("bcccbe1c8572d98fa73d824d80ea0a05")); executeTest("getting DB tag with dbSNP", spec); } @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testMultipleIdsWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1, - Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39")); + Arrays.asList("e9d82ed45249833803aad9ebf2a686c2")); executeTest("adding multiple IDs with dbSNP", spec); } @@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("cc7184263975595a6e2473d153227146")); + Arrays.asList("2ff3d4bdaa9e0cc2688b26fed601c184")); executeTest("getting DB tag with HM3", spec); } @@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithTwoComps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("6afbf05090ae139f53467cf6e0e71cf4")); + Arrays.asList("dbffbec98dc60850a6bde11540e8040c")); executeTest("getting DB tag with 2 comps", spec); } @@ -203,7 +203,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887")); + Arrays.asList("fb00c3b512926f8036829e53fd7f369a")); executeTest("using expression", spec); } @@ -211,7 +211,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionWithID() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("3de1d1998203518098ffae233f3e2352")); + Arrays.asList("915f72e6cbeb7a77cd2746b4a99a6d64")); executeTest("using expression with ID", spec); }