Merge pull request #708 from broadinstitute/ldg_SBannotationWarnings

Refactor StrandBiasTest (using template method) and add warnings for whe...
2014-08-20 09:30:06 -04:00 · 2014-08-20 09:30:06 -04:00 · 78c2da1fef
parent 86cb88e121 b512c7eac9
commit 78c2da1fef
4 changed files with 146 additions and 73 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
@ -47,6 +47,8 @@
 package org.broadinstitute.gatk.tools.walkers.annotator;

 import cern.jet.math.Arithmetic;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypesContext;
 import org.apache.log4j.Logger;
 import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
 import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
@ -87,42 +89,34 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
    private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
    private static final int MIN_COUNT = 2;

-    public Map<String, Object> annotate(final RefMetaDataTracker tracker,
-                                        final AnnotatorCompatible walker,
-                                        final ReferenceContext ref,
-                                        final Map<String, AlignmentContext> stratifiedContexts,
-                                        final VariantContext vc,
-                                        final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
-        if ( !vc.isVariant() )
-            return null;
-
-        if ( vc.hasGenotypes() ) {
-            final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
-            if ( tableFromPerSampleAnnotations != null ) {
-              return pValueForBestTable(tableFromPerSampleAnnotations, null);
-            }
-        }
-
-        if (vc.isSNP() && stratifiedContexts != null) {
-            final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
-            final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
-            printTable("unfiltered", tableNoFiltering);
-            printTable("filtered", tableFiltering);
-            return pValueForBestTable(tableFiltering, tableNoFiltering);
-        }
-        else if (stratifiedPerReadAlleleLikelihoodMap != null) {
-            // either SNP with no alignment context, or indels: per-read likelihood map needed
-            final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
-            //logger.info("VC " + vc);
-            //printTable(table, 0.0);
-            return pValueForBestTable(table, null);
-        }
-        else
-            // for non-snp variants, we  need per-read likelihoods.
-            // for snps, we can get same result from simple pileup
-            return null;
+    @Override
+    protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
+        final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
+        return ( tableFromPerSampleAnnotations != null )? pValueForBestTable(tableFromPerSampleAnnotations, null) : null;
    }

+    @Override
+    protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
+                                                                                     final VariantContext vc){
+        final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
+        final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
+        printTable("unfiltered", tableNoFiltering);
+        printTable("filtered", tableFiltering);
+        return pValueForBestTable(tableFiltering, tableNoFiltering);
+    }
+
+    @Override
+    protected Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
+                                                                                final VariantContext vc){
+        // either SNP with no alignment context, or indels: per-read likelihood map needed
+        final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
+        //logger.info("VC " + vc);
+        //printTable(table, 0.0);
+        return pValueForBestTable(table, null);
+    }
+
+
+
    /**
     * Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
     *
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java
@ -48,7 +48,15 @@ package org.broadinstitute.gatk.tools.walkers.annotator;

 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import org.apache.log4j.Logger;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
 import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
+import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
+import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
+import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
 import htsjdk.variant.variantcontext.Genotype;
 import htsjdk.variant.variantcontext.GenotypesContext;
@ -64,6 +72,83 @@ import java.util.*;
 * Class of tests to detect strand bias.
 */
 public abstract class StrandBiasTest extends InfoFieldAnnotation {
+    private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
+
+    @Override
+    public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
+        boolean hasSBBSannotation = false;
+        for ( final VCFHeaderLine line : headerLines) {
+            if ( line instanceof VCFFormatHeaderLine) {
+                final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
+                if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
+                    hasSBBSannotation = true;
+                    break;
+                }
+            }
+        }
+
+        if (hasSBBSannotation) {
+            logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
+                    "values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
+            return;
+        }
+
+        boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
+        if (hasReads) {
+            logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
+            return;
+        }
+
+        logger.info(new String("No StrandBiasBySample annotation or read data was found.  Strand bias annotations will not be output."));
+
+    }
+
+    @Override
+    //template method for calculating strand bias annotations using the three different methods
+    public Map<String, Object> annotate(final RefMetaDataTracker tracker,
+                                        final AnnotatorCompatible walker,
+                                        final ReferenceContext ref,
+                                        final Map<String,AlignmentContext> stratifiedContexts,
+                                        final VariantContext vc,
+                                        final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
+        if ( !vc.isVariant() )
+            return null;
+
+        if ( vc.hasGenotypes() ) {
+            boolean hasSB = false;
+            for (final Genotype g : vc.getGenotypes()) {
+                if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
+                    hasSB = true;
+                    break;
+                }
+            }
+            if (hasSB)
+                return calculateAnnotationFromGTfield(vc.getGenotypes());
+        }
+
+        //stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
+        if (vc.isSNP() && stratifiedContexts != null  && stratifiedContexts.size() > 0) {
+            return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
+        }
+
+        //stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
+        else if (stratifiedPerReadAlleleLikelihoodMap != null) {
+            return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
+        }
+        else
+            // for non-snp variants, we  need per-read likelihoods.
+            // for snps, we can get same result from simple pileup
+            return null;
+    }
+
+    protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
+
+    protected abstract Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
+                                                                                     final VariantContext vc);
+
+    protected abstract Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
+                                                                                final VariantContext vc);
+
    /**
     * Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
     * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
@ -108,9 +193,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
                                                  final int minCount ) {
        int[][] table = new int[2][2];

-        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
+        for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
            final int[] myTable = new int[4];
-            for (PileupElement p : sample.getValue().getBasePileup()) {
+            for (final PileupElement p : sample.getValue().getBasePileup()) {

                if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
                    continue;
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java
@ -46,6 +46,8 @@

 package org.broadinstitute.gatk.tools.walkers.annotator;

+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypesContext;
 import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
 import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
 import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
@ -79,38 +81,30 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
    private static final String SOR = "SOR";

    @Override
-    public Map<String, Object> annotate(final RefMetaDataTracker tracker,
-                                        final AnnotatorCompatible walker,
-                                        final ReferenceContext ref,
-                                        final Map<String,AlignmentContext> stratifiedContexts,
-                                        final VariantContext vc,
-                                        final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
-        if ( !vc.isVariant() )
-            return null;
-
-        if ( vc.hasGenotypes() ) {
-            final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
-            if ( tableFromPerSampleAnnotations != null ) {
-                final double ratio = calculateSOR(tableFromPerSampleAnnotations);
-                return annotationForOneTable(ratio);
-            }
-        }
-
-        if (vc.isSNP() && stratifiedContexts != null) {
-            final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
-            final double ratio = calculateSOR(tableNoFiltering);
+    protected Map<String, Object> calculateAnnotationFromGTfield(GenotypesContext genotypes){
+        final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
+        if ( tableFromPerSampleAnnotations != null ) {
+            final double ratio = calculateSOR(tableFromPerSampleAnnotations);
            return annotationForOneTable(ratio);
        }
-        else if (stratifiedPerReadAlleleLikelihoodMap != null) {
-            // either SNP with no alignment context, or indels: per-read likelihood map needed
-            final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
-            final double ratio = calculateSOR(table);
-            return annotationForOneTable(ratio);
-        }
-        else
-            // for non-snp variants, we  need per-read likelihoods.
-            // for snps, we can get same result from simple pileup
-            return null;
+        return null;
+    }
+
+    @Override
+    protected Map<String, Object> calculateAnnotationFromStratifiedContexts(Map<String, AlignmentContext> stratifiedContexts,
+                                                                                     final VariantContext vc){
+        final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
+        final double ratio = calculateSOR(tableNoFiltering);
+        return annotationForOneTable(ratio);
+    }
+
+    @Override
+    protected Map<String, Object> calculateAnnotationFromLikelihoodMap(Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
+                                                                                final VariantContext vc){
+        // either SNP with no alignment context, or indels: per-read likelihood map needed
+        final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
+        final double ratio = calculateSOR(table);
+        return annotationForOneTable(ratio);
    }

    /**
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -155,7 +155,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoReads() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
+                Arrays.asList("e86d04320dc9a597c1f58ad4b03b33c3"));
        executeTest("not passing it any reads", spec);
    }

@ -163,7 +163,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testDBTagWithDbsnp() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
+                Arrays.asList("bcccbe1c8572d98fa73d824d80ea0a05"));
        executeTest("getting DB tag with dbSNP", spec);
    }

@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testMultipleIdsWithDbsnp() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
-                Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
+                Arrays.asList("e9d82ed45249833803aad9ebf2a686c2"));
        executeTest("adding multiple IDs with dbSNP", spec);
    }

@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testDBTagWithHapMap() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("cc7184263975595a6e2473d153227146"));
+                Arrays.asList("2ff3d4bdaa9e0cc2688b26fed601c184"));
        executeTest("getting DB tag with HM3", spec);
    }

@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testDBTagWithTwoComps() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("6afbf05090ae139f53467cf6e0e71cf4"));
+                Arrays.asList("dbffbec98dc60850a6bde11540e8040c"));
        executeTest("getting DB tag with 2 comps", spec);
    }

@ -203,7 +203,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testUsingExpression() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
+                Arrays.asList("fb00c3b512926f8036829e53fd7f369a"));
        executeTest("using expression", spec);
    }

@ -211,7 +211,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testUsingExpressionWithID() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
-                Arrays.asList("3de1d1998203518098ffae233f3e2352"));
+                Arrays.asList("915f72e6cbeb7a77cd2746b4a99a6d64"));
        executeTest("using expression with ID", spec);
    }