Correct wrong left-alignment of reads in HC bamout
Story: ----- https://www.pivotaltracker.com/story/show/80684230 Changes: ------- - Corrected the bug: AlignmentUtils#createReadAlignedToRef was not realigning against the reference but the best haplotype for the read. Test: ---- - Added integration test in HaplotypeCallerIntegrationTest to check that the bug has been fixed. - Fixed md5s modified by this change; these are cause due to small changes in the state of the random-number generator and read vs variant site overlapping.
This commit is contained in:
parent
31cb47b9e6
commit
c5977e5c8f
|
|
@ -1046,7 +1046,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,samplesList,reads);
|
||||
|
||||
// Realign reads to their best haplotype.
|
||||
final Map<GATKSAMRecord,GATKSAMRecord> readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getPaddedReferenceLoc());
|
||||
final Map<GATKSAMRecord,GATKSAMRecord> readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getReferenceHaplotype(), assemblyResult.getPaddedReferenceLoc());
|
||||
readLikelihoods.changeReads(readRealignments);
|
||||
|
||||
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
|
||||
|
|
@ -1111,7 +1111,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
* </p>
|
||||
* @return never {@code null}
|
||||
*/
|
||||
private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final GenomeLoc paddedReferenceLoc) {
|
||||
private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) {
|
||||
|
||||
final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles();
|
||||
final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size());
|
||||
|
|
@ -1120,7 +1120,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
final GATKSAMRecord originalRead = bestAllele.read;
|
||||
final Haplotype bestHaplotype = bestAllele.allele;
|
||||
final boolean isInformative = bestAllele.isInformative();
|
||||
final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead,bestHaplotype,paddedReferenceLoc.getStart(),isInformative);
|
||||
final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative);
|
||||
result.put(originalRead,realignedRead);
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "438933e7d6345b80ff09d9be40bdb42e");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -93,13 +93,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"a571eb20a5bd17a41b2bd29d00adacc1");
|
||||
"d27dba3772a68f254d00b247f09099ec");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"e208f8ab7c90559757fdabc4fe6710f7");
|
||||
"0eb6ce85c70b152062a5af63b6da3e8f");
|
||||
}
|
||||
|
||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||
|
|
@ -111,7 +111,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"4c8352d71877a585b8a9b74567be72e6");
|
||||
"523d3fb07b40d8c2637e82ba7a6e2a5b");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "bd0c2401f0c0a1f35ca0563a74672116");
|
||||
HCTest(CEUTRIO_BAM, "", "9863e997c4f4be60e7e76a5573ec270f");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -141,7 +141,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f9388b9a6c8bd76862dc716adfb9fd5d");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4d8fdb7ae9dd599f8c9f54ae4484ef38");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
|
||||
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
|
||||
"48b11c06729a99d4d54c5aa99663343c");
|
||||
"e9bc5ddd6e7e530268ffeee4c853f8f0");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -267,6 +267,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_INPUT = privateTestDir + "/bamout-indel-left-align-bugfix-input.bam";
|
||||
|
||||
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
|
||||
|
||||
@Test
|
||||
public void testLeftAlignmentBamOutBugFix() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
|
||||
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("c19f0e62f90794661f5927c360d50998"));
|
||||
executeTest("LeftAlignmentBamOutBugFix", spec);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// test dbSNP annotation
|
||||
|
|
@ -277,7 +290,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("86fb942473b3f8df2f8865209e551200"));
|
||||
Arrays.asList("f0618b66e088b2d6fd831bb357de933c"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +307,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024"));
|
||||
Arrays.asList("1b15b696671a9da000d5ec0372f939a2"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
|
|||
final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();
|
||||
|
||||
if ( expectedReadCigar == null ) {
|
||||
Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true));
|
||||
Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true));
|
||||
} else {
|
||||
final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
|
||||
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true);
|
||||
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true);
|
||||
|
||||
Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
|
||||
Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
|
||||
|
|
@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
|
||||
public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
|
||||
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
|
||||
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, 1, true);
|
||||
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true);
|
||||
if ( alignedRead != null ) {
|
||||
final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
|
||||
Assert.assertTrue(mismatches <= expectedMaxMismatches,
|
||||
|
|
|
|||
|
|
@ -397,13 +397,17 @@ public class PerReadAlleleLikelihoodMap {
|
|||
|
||||
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
|
||||
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
|
||||
for ( final Haplotype haplotype : haplotypes )
|
||||
Haplotype refHaplotype = null;
|
||||
for ( final Haplotype haplotype : haplotypes ) {
|
||||
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
|
||||
if (refHaplotype == null && haplotype.isReference())
|
||||
refHaplotype = haplotype;
|
||||
}
|
||||
|
||||
final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size());
|
||||
for( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
|
||||
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
|
||||
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
|
||||
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), refHaplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative());
|
||||
newLikelihoodReadMap.put(alignedToRef, entry.getValue());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -82,10 +82,12 @@ public final class AlignmentUtils {
|
|||
*/
|
||||
public static GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
|
||||
final Haplotype haplotype,
|
||||
final Haplotype refHaplotype,
|
||||
final int referenceStart,
|
||||
final boolean isInformative) {
|
||||
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
|
||||
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
|
||||
if ( refHaplotype == null ) throw new IllegalArgumentException("ref haplotype cannot be null");
|
||||
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
|
||||
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
|
||||
|
||||
|
|
@ -115,7 +117,7 @@ public final class AlignmentUtils {
|
|||
final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
|
||||
final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
|
||||
final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
|
||||
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
|
||||
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(),
|
||||
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
|
||||
|
||||
read.setCigar(readToRefCigar);
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam;
|
|||
import htsjdk.samtools.*;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -571,6 +570,8 @@ public class AlignmentUtilsUnitTest {
|
|||
// Test AlignmentUtils.leftAlignIndel() //
|
||||
//////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
@DataProvider(name = "LeftAlignIndelDataProvider")
|
||||
public Object[][] makeLeftAlignIndelDataProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue