1. Ryan and I noticed that the FisherStrand annotation was completely busted for indels with reduced reads; fixed.

2. While making the previous fix and unifying FS for SNPs and indels, I noticed that FS was slightly broken in the general case for indels too; fixed. 3. I also fixed a minor bug in the Allele Biased Downsampling code for reduced reads.
2013-01-18 03:35:48 -05:00 · 2013-01-18 03:35:48 -05:00 · 39c73a6cf5
parent 6a903f2c23
commit 39c73a6cf5
4 changed files with 39 additions and 41 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
@ -84,12 +84,13 @@ public class AlleleBiasedDownsamplingUtils {
        // start by stratifying the reads by the alleles they represent at this position
        for( final PileupElement pe : pileup ) {
            // we do not want to remove a reduced read
-            if ( pe.getRead().isReducedRead() )
+            if ( pe.getRead().isReducedRead() ) {
                reducedReadPileups.add(pe);
-
-            final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
-            if ( baseIndex != -1 )
-                alleleStratifiedElements[baseIndex].add(pe);
+            } else {
+                final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
+                if ( baseIndex != -1 )
+                    alleleStratifiedElements[baseIndex].add(pe);
+            }
        }

        // Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -265,24 +265,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat

        for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
            for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
-                final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref,true);
-                final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt,true);
-
-                if ( !matchesRef && !matchesAlt )
-                    continue;
-
-                boolean isFW = el.getKey().getReadNegativeStrandFlag();
-
-                int row = matchesRef ? 0 : 1;
-                int column = isFW ? 0 : 1;
-
+                final Allele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
                final GATKSAMRecord read = el.getKey();
-                table[row][column] += (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1);
+                final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
+                updateTable(table, mostLikelyAllele, read, ref, alt, representativeCount);
            }
        }

        return table;
    }
+
    /**
     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
     *             fw      rc
@ -299,31 +291,36 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
            for (PileupElement p : sample.getValue().getBasePileup()) {

-                // ignore reduced reads because they are always on the forward strand!
-                // TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test
-                if ( p.getRead().isReducedRead() )
-                    continue;
-
                if ( ! RankSumTest.isUsableBase(p, false) ) // ignore deletions
                    continue;

                if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
                    continue;

-                final Allele base = Allele.create(p.getBase(), false);
-                final boolean isFW = !p.getRead().getReadNegativeStrandFlag();
-
-                final boolean matchesRef = ref.equals(base, true);
-                final boolean matchesAlt = alt.equals(base, true);
-                if ( matchesRef || matchesAlt ) {
-                    int row = matchesRef ? 0 : 1;
-                    int column = isFW ? 0 : 1;
-
-                    table[row][column] += p.getRepresentativeCount();
-                }
+                updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
            }
        }

        return table;
    }
+
+    private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
+        // ignore reduced reads because they are always on the forward strand!
+        // TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test
+        if ( read.isReducedRead() )
+            return;
+
+        final boolean matchesRef = allele.equals(ref, true);
+        final boolean matchesAlt = allele.equals(alt, true);
+
+        if ( matchesRef || matchesAlt ) {
+
+            final boolean isFW = !read.getReadNegativeStrandFlag();
+
+            int row = matchesRef ? 0 : 1;
+            int column = isFW ? 0 : 1;
+
+            table[row][column] += representativeCount;
+        }
+    }
 }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -363,7 +363,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -o %s" +
                         " -L 1:10,000,000-10,500,000",
                 1,
-                 Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a"));
+                 Arrays.asList("8231ae37b52b927db9fc1e5c221b0ba0"));

         executeTest(String.format("test indel calling, multiple technologies"), spec);
     }
@ -391,13 +391,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSampleIndels1() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("3d3c5691973a223209a1341272d881be"));
+                Arrays.asList("a47810de2f6ef8087f4644064a0814bc"));
        List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();

        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
                        "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("23b7a37a64065cee53a80495c8717eea"));
+                Arrays.asList("53b8d2b0fa63c5d1019855e8e0db28f0"));
        executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
    }

@ -497,18 +497,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testReducedBam() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
+                Arrays.asList("02175dc9731aed92837ce0db78489fc0"));
        executeTest("test calling on a ReducedRead BAM", spec);
    }

    @Test
    public void testReducedBamSNPs() {
-        testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
+        testReducedCalling("SNP", "fe1af8b30b7f1a267f772b9aaf388f24");
    }

    @Test
    public void testReducedBamINDELs() {
-        testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e");
+        testReducedCalling("INDEL", "a85c110fcac9574a54c7daccb1e2d5ae");
    }


--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    @Test
    public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
-                "d590c8d6d5e58d685401b65a23846893");
+                "1a034b7eb572e1b6f659d6e5d57b3e76");
    }

    private void HCTestComplexVariants(String bam, String args, String md5) {
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void testReducedBamWithReadsNotFullySpanningDeletion() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
-                Arrays.asList("6c22e5d57c4f5b631e3345e721aaca1b"));
+                Arrays.asList("4e8121dd9dc90478f237bd6ae4d19920"));
        executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
    }
 }