From 39c73a6cf5adf5d0643c93af0c60b354a31e73ad Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 18 Jan 2013 03:35:48 -0500 Subject: [PATCH] 1. Ryan and I noticed that the FisherStrand annotation was completely busted for indels with reduced reads; fixed. 2. While making the previous fix and unifying FS for SNPs and indels, I noticed that FS was slightly broken in the general case for indels too; fixed. 3. I also fixed a minor bug in the Allele Biased Downsampling code for reduced reads. --- .../AlleleBiasedDownsamplingUtils.java | 11 ++-- .../gatk/walkers/annotator/FisherStrand.java | 53 +++++++++---------- .../UnifiedGenotyperIntegrationTest.java | 12 ++--- .../HaplotypeCallerIntegrationTest.java | 4 +- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index a7bb58d0c..ba1da7c87 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -84,12 +84,13 @@ public class AlleleBiasedDownsamplingUtils { // start by stratifying the reads by the alleles they represent at this position for( final PileupElement pe : pileup ) { // we do not want to remove a reduced read - if ( pe.getRead().isReducedRead() ) + if ( pe.getRead().isReducedRead() ) { reducedReadPileups.add(pe); - - final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); - if ( baseIndex != -1 ) - alleleStratifiedElements[baseIndex].add(pe); + } else { + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); + if ( baseIndex != -1 ) + alleleStratifiedElements[baseIndex].add(pe); + } } // Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later. diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index fbd27dfe3..ff3d7940f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -265,24 +265,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { for (Map.Entry> el : maps.getLikelihoodReadMap().entrySet()) { - final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref,true); - final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt,true); - - if ( !matchesRef && !matchesAlt ) - continue; - - boolean isFW = el.getKey().getReadNegativeStrandFlag(); - - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; - + final Allele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); final GATKSAMRecord read = el.getKey(); - table[row][column] += (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1); + final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1; + updateTable(table, mostLikelyAllele, read, ref, alt, representativeCount); } } return table; } + /** Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: * fw rc @@ -299,31 +291,36 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat for ( Map.Entry sample : stratifiedContexts.entrySet() ) { for (PileupElement p : sample.getValue().getBasePileup()) { - // ignore reduced reads because they are always on the forward strand! - // TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test - if ( p.getRead().isReducedRead() ) - continue; - if ( ! RankSumTest.isUsableBase(p, false) ) // ignore deletions continue; if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider ) continue; - final Allele base = Allele.create(p.getBase(), false); - final boolean isFW = !p.getRead().getReadNegativeStrandFlag(); - - final boolean matchesRef = ref.equals(base, true); - final boolean matchesAlt = alt.equals(base, true); - if ( matchesRef || matchesAlt ) { - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; - - table[row][column] += p.getRepresentativeCount(); - } + updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount()); } } return table; } + + private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) { + // ignore reduced reads because they are always on the forward strand! + // TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test + if ( read.isReducedRead() ) + return; + + final boolean matchesRef = allele.equals(ref, true); + final boolean matchesAlt = allele.equals(alt, true); + + if ( matchesRef || matchesAlt ) { + + final boolean isFW = !read.getReadNegativeStrandFlag(); + + int row = matchesRef ? 0 : 1; + int column = isFW ? 0 : 1; + + table[row][column] += representativeCount; + } + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index a84019988..5b5a75d4e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -363,7 +363,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a")); + Arrays.asList("8231ae37b52b927db9fc1e5c221b0ba0")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -391,13 +391,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("3d3c5691973a223209a1341272d881be")); + Arrays.asList("a47810de2f6ef8087f4644064a0814bc")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("23b7a37a64065cee53a80495c8717eea")); + Arrays.asList("53b8d2b0fa63c5d1019855e8e0db28f0")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -497,18 +497,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("092e42a712afb660ec79ff11c55933e2")); + Arrays.asList("02175dc9731aed92837ce0db78489fc0")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f"); + testReducedCalling("SNP", "fe1af8b30b7f1a267f772b9aaf388f24"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e"); + testReducedCalling("INDEL", "a85c110fcac9574a54c7daccb1e2d5ae"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 6c7afd8bb..27fe31fa7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "d590c8d6d5e58d685401b65a23846893"); + "1a034b7eb572e1b6f659d6e5d57b3e76"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("6c22e5d57c4f5b631e3345e721aaca1b")); + Arrays.asList("4e8121dd9dc90478f237bd6ae4d19920")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } }