Correct wrong left-alignment of reads in HC bamout

Story:
-----

  https://www.pivotaltracker.com/story/show/80684230

Changes:
-------

  - Corrected the bug: AlignmentUtils#createReadAlignedToRef was
    not realigning against the reference but the best haplotype for
    the read.

Test:
----

  - Added integration test in HaplotypeCallerIntegrationTest to check
    that the bug has been fixed.
  - Fixed md5s modified by this change; these are cause due to small
    changes in the state of the random-number generator and read vs
    variant site overlapping.
This commit is contained in:
Valentin Ruano-Rubio 2014-11-06 18:03:52 -05:00
parent 31cb47b9e6
commit c5977e5c8f
7 changed files with 39 additions and 19 deletions

View File

@ -1046,7 +1046,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,samplesList,reads);
// Realign reads to their best haplotype.
final Map<GATKSAMRecord,GATKSAMRecord> readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getPaddedReferenceLoc());
final Map<GATKSAMRecord,GATKSAMRecord> readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getReferenceHaplotype(), assemblyResult.getPaddedReferenceLoc());
readLikelihoods.changeReads(readRealignments);
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
@ -1111,7 +1111,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
* </p>
* @return never {@code null}
*/
private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final GenomeLoc paddedReferenceLoc) {
private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) {
final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles();
final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size());
@ -1120,7 +1120,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final GATKSAMRecord originalRead = bestAllele.read;
final Haplotype bestHaplotype = bestAllele.allele;
final boolean isInformative = bestAllele.isInformative();
final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead,bestHaplotype,paddedReferenceLoc.getStart(),isInformative);
final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative);
result.put(originalRead,realignedRead);
}
return result;

View File

@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "438933e7d6345b80ff09d9be40bdb42e");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -93,13 +93,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"a571eb20a5bd17a41b2bd29d00adacc1");
"d27dba3772a68f254d00b247f09099ec");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"e208f8ab7c90559757fdabc4fe6710f7");
"0eb6ce85c70b152062a5af63b6da3e8f");
}
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -111,7 +111,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"4c8352d71877a585b8a9b74567be72e6");
"523d3fb07b40d8c2637e82ba7a6e2a5b");
}
}

View File

@ -90,7 +90,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "bd0c2401f0c0a1f35ca0563a74672116");
HCTest(CEUTRIO_BAM, "", "9863e997c4f4be60e7e76a5573ec270f");
}
@Test
@ -141,7 +141,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f9388b9a6c8bd76862dc716adfb9fd5d");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4d8fdb7ae9dd599f8c9f54ae4484ef38");
}
@Test
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
"48b11c06729a99d4d54c5aa99663343c");
"e9bc5ddd6e7e530268ffeee4c853f8f0");
}
@Test
@ -267,6 +267,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
}
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_INPUT = privateTestDir + "/bamout-indel-left-align-bugfix-input.bam";
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
@Test
public void testLeftAlignmentBamOutBugFix() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("c19f0e62f90794661f5927c360d50998"));
executeTest("LeftAlignmentBamOutBugFix", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// test dbSNP annotation
@ -277,7 +290,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("86fb942473b3f8df2f8865209e551200"));
Arrays.asList("f0618b66e088b2d6fd831bb357de933c"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -294,7 +307,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024"));
Arrays.asList("1b15b696671a9da000d5ec0372f939a2"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}

View File

@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();
if ( expectedReadCigar == null ) {
Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true));
Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true));
} else {
final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true);
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true);
Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, 1, true);
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true);
if ( alignedRead != null ) {
final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
Assert.assertTrue(mismatches <= expectedMaxMismatches,

View File

@ -397,13 +397,17 @@ public class PerReadAlleleLikelihoodMap {
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
Haplotype refHaplotype = null;
for ( final Haplotype haplotype : haplotypes ) {
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
if (refHaplotype == null && haplotype.isReference())
refHaplotype = haplotype;
}
final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size());
for( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), refHaplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative());
newLikelihoodReadMap.put(alignedToRef, entry.getValue());
}

View File

@ -82,10 +82,12 @@ public final class AlignmentUtils {
*/
public static GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final Haplotype refHaplotype,
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( refHaplotype == null ) throw new IllegalArgumentException("ref haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
@ -115,7 +117,7 @@ public final class AlignmentUtils {
final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(),
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);

View File

@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.*;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -571,6 +570,8 @@ public class AlignmentUtilsUnitTest {
// Test AlignmentUtils.leftAlignIndel() //
//////////////////////////////////////////
@DataProvider(name = "LeftAlignIndelDataProvider")
public Object[][] makeLeftAlignIndelDataProvider() {
List<Object[]> tests = new ArrayList<Object[]>();