diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 1b9b9e106..84e7e535f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -1046,7 +1046,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,samplesList,reads); // Realign reads to their best haplotype. - final Map readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getPaddedReferenceLoc()); + final Map readRealignments = realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getReferenceHaplotype(), assemblyResult.getPaddedReferenceLoc()); readLikelihoods.changeReads(readRealignments); // Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there @@ -1111,7 +1111,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In *

* @return never {@code null} */ - private Map realignReadsToTheirBestHaplotype(final ReadLikelihoods originalReadLikelihoods, final GenomeLoc paddedReferenceLoc) { + private Map realignReadsToTheirBestHaplotype(final ReadLikelihoods originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) { final Collection.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles(); final Map result = new HashMap<>(bestAlleles.size()); @@ -1120,7 +1120,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final GATKSAMRecord originalRead = bestAllele.read; final Haplotype bestHaplotype = bestAllele.allele; final boolean isInformative = bestAllele.isInformative(); - final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead,bestHaplotype,paddedReferenceLoc.getStart(),isInformative); + final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative); result.put(originalRead,realignedRead); } return result; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9baa01e9b..6dd7d4b1f 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "438933e7d6345b80ff09d9be40bdb42e"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -93,13 +93,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "a571eb20a5bd17a41b2bd29d00adacc1"); + "d27dba3772a68f254d00b247f09099ec"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "e208f8ab7c90559757fdabc4fe6710f7"); + "0eb6ce85c70b152062a5af63b6da3e8f"); } private void HCTestComplexConsensusMode(String bam, String args, String md5) { @@ -111,7 +111,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "4c8352d71877a585b8a9b74567be72e6"); + "523d3fb07b40d8c2637e82ba7a6e2a5b"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 73e1eda36..860070fb0 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -90,7 +90,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "bd0c2401f0c0a1f35ca0563a74672116"); + HCTest(CEUTRIO_BAM, "", "9863e997c4f4be60e7e76a5573ec270f"); } @Test @@ -141,7 +141,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f9388b9a6c8bd76862dc716adfb9fd5d"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4d8fdb7ae9dd599f8c9f54ae4484ef38"); } @Test @@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" + " -isr INTERSECTION -L " + GGA_INTERVALS_FILE, - "48b11c06729a99d4d54c5aa99663343c"); + "e9bc5ddd6e7e530268ffeee4c853f8f0"); } @Test @@ -267,6 +267,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { } + private static final String LEFT_ALIGNMENT_BAMOUT_TEST_INPUT = privateTestDir + "/bamout-indel-left-align-bugfix-input.bam"; + + private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam"; + + @Test + public void testLeftAlignmentBamOutBugFix() { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT) + + " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef"; + final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("c19f0e62f90794661f5927c360d50998")); + executeTest("LeftAlignmentBamOutBugFix", spec); + } + + // -------------------------------------------------------------------------------------------------------------- // // test dbSNP annotation @@ -277,7 +290,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("86fb942473b3f8df2f8865209e551200")); + Arrays.asList("f0618b66e088b2d6fd831bb357de933c")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -294,7 +307,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024")); + Arrays.asList("1b15b696671a9da000d5ec0372f939a2")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java index bfd588cab..b09efb939 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java @@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest { final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone(); if ( expectedReadCigar == null ) { - Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true)); + Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true)); } else { final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar); - final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true); + final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true); Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); @@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest { @Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); - final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, 1, true); + final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true); if ( alignedRead != null ) { final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; Assert.assertTrue(mismatches <= expectedMaxMismatches, diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java index 56d12d026..1256df558 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -397,13 +397,17 @@ public class PerReadAlleleLikelihoodMap { // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently final Map alleleToHaplotypeMap = new HashMap<>(haplotypes.size()); - for ( final Haplotype haplotype : haplotypes ) + Haplotype refHaplotype = null; + for ( final Haplotype haplotype : haplotypes ) { alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype); + if (refHaplotype == null && haplotype.isReference()) + refHaplotype = haplotype; + } final Map> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size()); for( final Map.Entry> entry : likelihoodReadMap.entrySet() ) { final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); - final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative()); + final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), refHaplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative()); newLikelihoodReadMap.put(alignedToRef, entry.getValue()); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java index 4c9a4447b..7835b2190 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java @@ -82,10 +82,12 @@ public final class AlignmentUtils { */ public static GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead, final Haplotype haplotype, + final Haplotype refHaplotype, final int referenceStart, final boolean isInformative) { if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null"); if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null"); + if ( refHaplotype == null ) throw new IllegalArgumentException("ref haplotype cannot be null"); if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype); if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart); @@ -115,7 +117,7 @@ public final class AlignmentUtils { final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1); final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef); final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw); - final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, haplotype.getBases(), + final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(), originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true); read.setCigar(readToRefCigar); diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java index 32815706e..3334efddc 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java @@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam; import htsjdk.samtools.*; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -571,6 +570,8 @@ public class AlignmentUtilsUnitTest { // Test AlignmentUtils.leftAlignIndel() // ////////////////////////////////////////// + + @DataProvider(name = "LeftAlignIndelDataProvider") public Object[][] makeLeftAlignIndelDataProvider() { List tests = new ArrayList();