From 0cf5d30dacbff0a4e69baa2192b68a68ddca44fe Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 13 Mar 2013 15:51:01 -0400 Subject: [PATCH] Bug fix in assembly for edge case in which the extendPartialHaplotype function was filling in deletions in the middle of haplotypes. --- .../haplotypecaller/DeBruijnAssembler.java | 24 ++++++++++--------- .../walkers/haplotypecaller/KBestPaths.java | 20 +--------------- ...lexAndSymbolicVariantsIntegrationTest.java | 4 ++-- .../HaplotypeCallerIntegrationTest.java | 12 +++++----- 4 files changed, 22 insertions(+), 38 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 1e447240b..566605a8c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -438,7 +438,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { byte[] newHaplotypeBases = haplotype.getBases(); int refPos = activeRegionStart; int hapPos = 0; - for( CigarElement ce : cigar.getCigarElements() ) { + for( int iii = 0; iii < cigar.getCigarElements().size(); iii++ ) { + final CigarElement ce = cigar.getCigarElement(iii); switch (ce.getOperator()) { case M: refPos += ce.getLength(); @@ -450,16 +451,17 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { newCigar.add(ce); break; case D: - refPos += ce.getLength(); - newCigar.add(ce); - break; - case X: - newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos), - ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()), - Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length))); - refPos += ce.getLength(); - hapPos += ce.getLength(); - newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M)); + if( iii == 0 || iii == cigar.getCigarElements().size() - 1 ) { + newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos), + ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()), + Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length))); + hapPos += ce.getLength(); + refPos += ce.getLength(); + newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M)); + } else { + refPos += ce.getLength(); + newCigar.add(ce); + } break; default: throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java index 90c2e6a2a..e97fdb3cb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java @@ -310,31 +310,13 @@ public class KBestPaths { if( swCigar.numCigarElements() > 6 ) { // this bubble is too divergent from the reference returnCigar.add(new CigarElement(1, CigarOperator.N)); } else { - int skipElement = -1; - if( fromVertex == null ) { - for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) { - final CigarElement ce = swCigar.getCigarElement(iii); - if( ce.getOperator().equals(CigarOperator.D) ) { - skipElement = iii; - break; - } - } - } else if (toVertex == null ) { - for( int iii = swCigar.numCigarElements() - 1; iii >= 0; iii-- ) { - final CigarElement ce = swCigar.getCigarElement(iii); - if( ce.getOperator().equals(CigarOperator.D) ) { - skipElement = iii; - break; - } - } - } for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) { // now we need to remove the padding from the cigar string int length = swCigar.getCigarElement(iii).getLength(); if( iii == 0 ) { length -= padding.length; } if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; } if( length > 0 ) { - returnCigar.add(new CigarElement(length, (skipElement == iii ? CigarOperator.X : swCigar.getCigarElement(iii).getOperator()))); + returnCigar.add(new CigarElement(length, swCigar.getCigarElement(iii).getOperator())); } } if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index fcf9168b3..72e06ddc6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -87,12 +87,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "417174e043dbb8b86cc3871da9b50536"); + "fd3412030628fccf77effdb1ec03dce7"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "9563e3c1eee2ef46afc7822af0bb58a8"); + "633e8930a263e34def5e097889dd9805"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a9898b567..fb267297f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -69,12 +69,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708"); + HCTest(CEUTRIO_BAM, "", "694d6ea7f0f305854d4108379d68de75"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10"); + HCTest(NA12878_BAM, "", "995501d8af646af3b6eaa4109e2fb4a0"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "fa1b92373c89d2238542a319ad25c257"); + "627124af27dc4556d83df1a04e4b9f97"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "205fc8647b908c0dab7b5c6d6b78c0c2"); } @Test @@ -118,7 +118,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f1250a8ecd404443dcca20741a74ec4f")); executeTest("HCTestStructuralIndels: ", spec); } @@ -148,7 +148,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9")); + Arrays.asList("d3eb900eecdafafda3170f67adff42ae")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } }