Allow SOR to be calculated from HC

Refactor StrandBiasTest classes
2014-08-01 13:55:06 -04:00 · 2014-08-01 13:55:06 -04:00 · 5533199402
parent e69b0d6316
commit 5533199402
4 changed files with 144 additions and 113 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
@ -104,15 +104,15 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
        }

        if (vc.isSNP() && stratifiedContexts != null) {
-            final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
-            final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
+            final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
+            final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
            printTable("unfiltered", tableNoFiltering);
            printTable("filtered", tableFiltering);
            return pValueForBestTable(tableFiltering, tableNoFiltering);
        }
        else if (stratifiedPerReadAlleleLikelihoodMap != null) {
            // either SNP with no alignment context, or indels: per-read likelihood map needed
-            final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
+            final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
            //logger.info("VC " + vc);
            //printTable(table, 0.0);
            return pValueForBestTable(table, null);
@ -325,114 +325,6 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
        return sum;
    }

-    /**
-     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
-     *             fw      rc
-     *   allele1   #       #
-     *   allele2   #       #
-     * @return a 2x2 contingency table
-     */
-    public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap, final VariantContext vc) {
-        if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
-        if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }

-        final Allele ref = vc.getReference();
-        final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
-        final int[][] table = new int[2][2];

-        for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
-            final int[] myTable = new int[4];
-            for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
-                final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
-                final GATKSAMRecord read = el.getKey();
-                updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
-            }
-            if ( passesMinimumThreshold(myTable, MIN_COUNT) )
-                copyToMainTable(myTable, table);
-        }
-
-        return table;
-    }
-
-    /**
-     * Helper method to copy the per-sample table to the main table
-     *
-     * @param perSampleTable   per-sample table (single dimension)
-     * @param mainTable        main table (two dimensions)
-     */
-    private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
-        mainTable[0][0] += perSampleTable[0];
-        mainTable[0][1] += perSampleTable[1];
-        mainTable[1][0] += perSampleTable[2];
-        mainTable[1][1] += perSampleTable[3];
-    }
-
-    /**
-     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
-     *             fw      rc
-     *   allele1   #       #
-     *   allele2   #       #
-     * @return a 2x2 contingency table
-     */
-    private static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
-                                                  final Allele ref,
-                                                  final Allele alt,
-                                                  final int minQScoreToConsider ) {
-        int[][] table = new int[2][2];
-
-        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
-            final int[] myTable = new int[4];
-            for (PileupElement p : sample.getValue().getBasePileup()) {
-
-                if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
-                    continue;
-
-                if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
-                    continue;
-
-                updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
-            }
-
-            if ( passesMinimumThreshold( myTable, MIN_COUNT ) )
-                copyToMainTable(myTable, table);
-        }
-
-        return table;
-    }
-
-    /**
-     * Can the base in this pileup element be used in comparative tests?
-     *
-     * @param p the pileup element to consider
-     *
-     * @return true if this base is part of a meaningful read for comparison, false otherwise
-     */
-    private static boolean isUsableBase(final PileupElement p) {
-        return !( p.isDeletion() ||
-                p.getMappingQual() == 0 ||
-                p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
-                ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
-    }
-
-    private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) {
-
-        final boolean matchesRef = allele.equals(ref, true);
-        final boolean matchesAlt = allele.equals(alt, true);
-
-        if ( matchesRef || matchesAlt ) {
-            final int offset = matchesRef ? 0 : 2;
-
-            if ( read.isStrandless() ) {
-                // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
-                // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
-                // if the read is only seen once, because it's a merged read or other)
-                table[offset]++;
-                table[offset + 1]++;
-            } else {
-                // a normal read with an actual strand
-                final boolean isFW = !read.getReadNegativeStrandFlag();
-                table[offset + (isFW ? 0 : 1)]++;
-            }
-        }
-    }
 }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java
@ -82,7 +82,7 @@ public class StrandBiasBySample extends GenotypeAnnotation {
        if ( ! isAppropriateInput(alleleLikelihoodMap, g) )
            return;

-        final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc);
+        final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc, 0);

        gb.attribute(STRAND_BIAS_BY_SAMPLE_KEY_NAME, FisherStrand.getContingencyArray(table));
    }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java
@ -46,9 +46,17 @@

 package org.broadinstitute.gatk.tools.walkers.annotator;

+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
 import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
 import htsjdk.variant.variantcontext.Genotype;
 import htsjdk.variant.variantcontext.GenotypesContext;
+import org.broadinstitute.gatk.utils.QualityUtils;
+import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
+import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.gatk.utils.pileup.PileupElement;
+import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;

 import java.util.*;

@ -85,6 +93,122 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {

        return ( foundData ? decodeSBBS(sbArray) : null );
    }
+
+    /**
+     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
+     *             fw      rc
+     *   allele1   #       #
+     *   allele2   #       #
+     * @return a 2x2 contingency table
+     */
+    protected static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
+                                                  final Allele ref,
+                                                  final Allele alt,
+                                                  final int minQScoreToConsider,
+                                                  final int minCount ) {
+        int[][] table = new int[2][2];
+
+        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
+            final int[] myTable = new int[4];
+            for (PileupElement p : sample.getValue().getBasePileup()) {
+
+                if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
+                    continue;
+
+                if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
+                    continue;
+
+                updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
+            }
+
+            if ( passesMinimumThreshold( myTable, minCount ) )
+                copyToMainTable(myTable, table);
+        }
+
+        return table;
+    }
+
+    /**
+     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
+     *             fw      rc
+     *   allele1   #       #
+     *   allele2   #       #
+     * @return a 2x2 contingency table
+     */
+    public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
+                                               final VariantContext vc,
+                                               final int minCount) {
+        if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
+        if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
+
+        final Allele ref = vc.getReference();
+        final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
+        final int[][] table = new int[2][2];
+
+        for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
+            final int[] myTable = new int[4];
+            for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
+                final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
+                final GATKSAMRecord read = el.getKey();
+                updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
+            }
+            if ( passesMinimumThreshold(myTable, minCount) )
+                copyToMainTable(myTable, table);
+        }
+
+        return table;
+    }
+
+    /**
+     * Helper method to copy the per-sample table to the main table
+     *
+     * @param perSampleTable   per-sample table (single dimension)
+     * @param mainTable        main table (two dimensions)
+     */
+    private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
+        mainTable[0][0] += perSampleTable[0];
+        mainTable[0][1] += perSampleTable[1];
+        mainTable[1][0] += perSampleTable[2];
+        mainTable[1][1] += perSampleTable[3];
+    }
+
+
+    /**
+     * Can the base in this pileup element be used in comparative tests?
+     *
+     * @param p the pileup element to consider
+     *
+     * @return true if this base is part of a meaningful read for comparison, false otherwise
+     */
+    private static boolean isUsableBase(final PileupElement p) {
+        return !( p.isDeletion() ||
+                p.getMappingQual() == 0 ||
+                p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
+                ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
+    }
+
+    private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) {
+
+        final boolean matchesRef = allele.equals(ref, true);
+        final boolean matchesAlt = allele.equals(alt, true);
+
+        if ( matchesRef || matchesAlt ) {
+            final int offset = matchesRef ? 0 : 2;
+
+            if ( read.isStrandless() ) {
+                // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
+                // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
+                // if the read is only seen once, because it's a merged read or other)
+                table[offset]++;
+                table[offset + 1]++;
+            } else {
+                // a normal read with an actual strand
+                final boolean isFW = !read.getReadNegativeStrandFlag();
+                table[offset + (isFW ? 0 : 1)]++;
+            }
+        }
+    }
+
    /**
     * Does this strand data array pass the minimum threshold for inclusion?
     *
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java
@ -92,7 +92,22 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
                return annotationForOneTable(ratio);
            }
        }
-        return null;
+
+        if (vc.isSNP() && stratifiedContexts != null) {
+            final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
+            final double ratio = symmetricOddsRatio(tableNoFiltering);
+            return annotationForOneTable(ratio);
+        }
+        else if (stratifiedPerReadAlleleLikelihoodMap != null) {
+            // either SNP with no alignment context, or indels: per-read likelihood map needed
+            final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
+            final double ratio = symmetricOddsRatio(table);
+            return annotationForOneTable(ratio);
+        }
+        else
+            // for non-snp variants, we  need per-read likelihoods.
+            // for snps, we can get same result from simple pileup
+            return null;
    }

    /**