I finally gave up on trying to get the Haplotype/Allele merging to work in the HaplotypeCaller.

I've resigned myself instead to create a mapping from Allele to Haplotype.  It's cheap so not a big deal, but really shouldn't be necessary.
Ryan and I are talking about refactoring for GATK2.5.
This commit is contained in:
Eric Banks 2013-01-18 01:21:08 -05:00
parent 6db3e473af
commit 6a903f2c23
4 changed files with 29 additions and 14 deletions

View File

@ -508,12 +508,17 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
for ( Haplotype haplotype : haplotypes )
writeHaplotype(haplotype, paddedRefLoc, bestHaplotypes.contains(haplotype));
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
// next, output the interesting reads for each sample aligned against the appropriate haplotype
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
final Allele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
if ( bestAllele != Allele.NO_CALL )
writeReadAgainstHaplotype(entry.getKey(), (Haplotype) bestAllele, paddedRefLoc.getStart());
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele), paddedRefLoc.getStart());
}
}
}

View File

@ -124,9 +124,14 @@ public class LikelihoodCalculationEngine {
}
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads) {
// first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time)
final int numHaplotypes = haplotypes.size();
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
for ( final Haplotype haplotype : haplotypes ) {
alleleVersions.put(haplotype, Allele.create(haplotype.getBases()));
}
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
final int numHaplotypes = haplotypes.size();
for( final GATKSAMRecord read : reads ) {
final byte[] overallGCP = new byte[read.getReadLength()];
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
@ -148,7 +153,7 @@ public class LikelihoodCalculationEngine {
final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
previousHaplotypeSeen = haplotype;
perReadAlleleLikelihoodMap.add(read, haplotype,
perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype),
pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(),
readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, jjj == 0));
}

View File

@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "0e59153c6359d7cb7be44e25ab552790");
HCTest(CEUTRIO_BAM, "", "b8f7b741445ce6b6ea491c794ce75c17");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "d4b377aed2c8be2ebd81ee5e43b73a93");
HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15");
}
@Test(enabled = false)
@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"1a034b7eb572e1b6f659d6e5d57b3e76");
"d590c8d6d5e58d685401b65a23846893");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "14ed8e5be2d2a0bf478d742b4baa5a46");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6c0c441b71848c2eea38ab5e2afe1120");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -124,7 +124,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "76fe5e57ed96541bdfee74782331b061");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "25981f7706f61d930556fb128cd1e5c5");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
}
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -146,7 +146,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4701887e1927814259560d85098b6440"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8b1b8d1bd7feac1503fc4ffa6236cff7"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("18d047bf8116b56e0c6212e0875eceea"));
Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("0446c11fe2ba68a14f938ebc6e71ded7"));
Arrays.asList("6c22e5d57c4f5b631e3345e721aaca1b"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}

View File

@ -73,9 +73,14 @@ public class Haplotype extends Allele {
@Override
public boolean equals( Object h ) {
return h instanceof Haplotype && super.equals(h);
return h instanceof Haplotype && Arrays.equals(getBases(), ((Haplotype) h).getBases());
}
@Override
public int hashCode() {
return Arrays.hashCode(getBases());
}
public HashMap<Integer, VariantContext> getEventMap() {
return eventMap;
}