1. Ryan and I noticed that the FisherStrand annotation was completely busted for indels with reduced reads; fixed.

2. While making the previous fix and unifying FS for SNPs and indels, I noticed that FS was slightly broken in the general case for indels too; fixed.
3. I also fixed a minor bug in the Allele Biased Downsampling code for reduced reads.
This commit is contained in:
Eric Banks 2013-01-18 03:35:48 -05:00
parent 6a903f2c23
commit 39c73a6cf5
4 changed files with 39 additions and 41 deletions

View File

@ -84,12 +84,13 @@ public class AlleleBiasedDownsamplingUtils {
// start by stratifying the reads by the alleles they represent at this position
for( final PileupElement pe : pileup ) {
// we do not want to remove a reduced read
if ( pe.getRead().isReducedRead() )
if ( pe.getRead().isReducedRead() ) {
reducedReadPileups.add(pe);
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);
} else {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);
}
}
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.

View File

@ -265,24 +265,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref,true);
final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt,true);
if ( !matchesRef && !matchesAlt )
continue;
boolean isFW = el.getKey().getReadNegativeStrandFlag();
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
final Allele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
table[row][column] += (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1);
final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
updateTable(table, mostLikelyAllele, read, ref, alt, representativeCount);
}
}
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
@ -299,31 +291,36 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
for (PileupElement p : sample.getValue().getBasePileup()) {
// ignore reduced reads because they are always on the forward strand!
// TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test
if ( p.getRead().isReducedRead() )
continue;
if ( ! RankSumTest.isUsableBase(p, false) ) // ignore deletions
continue;
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
continue;
final Allele base = Allele.create(p.getBase(), false);
final boolean isFW = !p.getRead().getReadNegativeStrandFlag();
final boolean matchesRef = ref.equals(base, true);
final boolean matchesAlt = alt.equals(base, true);
if ( matchesRef || matchesAlt ) {
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column] += p.getRepresentativeCount();
}
updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
}
}
return table;
}
private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
// ignore reduced reads because they are always on the forward strand!
// TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test
if ( read.isReducedRead() )
return;
final boolean matchesRef = allele.equals(ref, true);
final boolean matchesAlt = allele.equals(alt, true);
if ( matchesRef || matchesAlt ) {
final boolean isFW = !read.getReadNegativeStrandFlag();
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column] += representativeCount;
}
}
}

View File

@ -363,7 +363,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a"));
Arrays.asList("8231ae37b52b927db9fc1e5c221b0ba0"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -391,13 +391,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("3d3c5691973a223209a1341272d881be"));
Arrays.asList("a47810de2f6ef8087f4644064a0814bc"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("23b7a37a64065cee53a80495c8717eea"));
Arrays.asList("53b8d2b0fa63c5d1019855e8e0db28f0"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -497,18 +497,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
Arrays.asList("02175dc9731aed92837ce0db78489fc0"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
testReducedCalling("SNP", "fe1af8b30b7f1a267f772b9aaf388f24");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e");
testReducedCalling("INDEL", "a85c110fcac9574a54c7daccb1e2d5ae");
}

View File

@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"d590c8d6d5e58d685401b65a23846893");
"1a034b7eb572e1b6f659d6e5d57b3e76");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("6c22e5d57c4f5b631e3345e721aaca1b"));
Arrays.asList("4e8121dd9dc90478f237bd6ae4d19920"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}