From b8991f5e9899374549a97a3aa92038ab662ec025 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 15 Mar 2013 12:31:54 -0400 Subject: [PATCH] Fix for edge case bug of trying to create insertions/deletions on the edge of contigs. -- Added integration test using MT that previously failed --- .../haplotypecaller/DeBruijnAssembler.java | 2 +- .../haplotypecaller/GenotypingEngine.java | 54 ++++++++++--------- .../HaplotypeCallerIntegrationTest.java | 8 ++- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 0a552c0a1..1e447240b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -430,7 +430,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { * @param refWithPadding the full reference byte array with padding which encompasses the active region * @return a haplotype fully extended to encompass the active region */ - @Requires({"haplotype != null", "activeRegionStart > 0", "refWithPadding != null", "refWithPadding.length > 0"}) + @Requires({"haplotype != null", "activeRegionStart >= 0", "refWithPadding != null", "refWithPadding.length > 0"}) @Ensures({"result != null", "result.getCigar() != null"}) private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) { final Cigar cigar = haplotype.getCigar(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 34a6ddfa6..1cfc65581 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -710,24 +710,26 @@ public class GenotypingEngine { switch( ce.getOperator() ) { case I: { - final List insertionAlleles = new ArrayList(); - final int insertionStart = refLoc.getStart() + refPos - 1; - final byte refByte = ref[refPos-1]; - if( BaseUtils.isRegularBase(refByte) ) { - insertionAlleles.add( Allele.create(refByte, true) ); - } - if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele - insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); - } else { - byte[] insertionBases = new byte[]{}; - insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base - insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength )); - if( BaseUtils.isAllRegularBases(insertionBases) ) { - insertionAlleles.add( Allele.create(insertionBases, false) ); + if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig + final List insertionAlleles = new ArrayList(); + final int insertionStart = refLoc.getStart() + refPos - 1; + final byte refByte = ref[refPos-1]; + if( BaseUtils.isRegularBase(refByte) ) { + insertionAlleles.add( Allele.create(refByte, true) ); + } + if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele + insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); + } else { + byte[] insertionBases = new byte[]{}; + insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base + insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength )); + if( BaseUtils.isAllRegularBases(insertionBases) ) { + insertionAlleles.add( Allele.create(insertionBases, false) ); + } + } + if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } - } - if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } alignmentPos += elementLength; break; @@ -739,14 +741,16 @@ public class GenotypingEngine { } case D: { - final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base - final List deletionAlleles = new ArrayList(); - final int deletionStart = refLoc.getStart() + refPos - 1; - final byte refByte = ref[refPos-1]; - if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) { - deletionAlleles.add( Allele.create(deletionBases, true) ); - deletionAlleles.add( Allele.create(refByte, false) ); - vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); + if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig + final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base + final List deletionAlleles = new ArrayList(); + final int deletionStart = refLoc.getStart() + refPos - 1; + final byte refByte = ref[refPos-1]; + if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) { + deletionAlleles.add( Allele.create(deletionBases, true) ); + deletionAlleles.add( Allele.create(refByte, false) ); + vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); + } } refPos += elementLength; break; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 42eb09e6e..a9898b567 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -58,6 +58,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam"; final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam"; + final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam"; final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; private void HCTest(String bam, String args, String md5) { @@ -76,7 +77,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10"); } - @Test(enabled = false) + @Test(enabled = false) // can't annotate the rsID's yet public void testHaplotypeCallerSingleSampleWithDbsnp() { HCTest(NA12878_BAM, "-D " + b37dbSNP132, ""); } @@ -98,6 +99,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53"); } + @Test + public void testHaplotypeCallerInsertionOnEdgeOfContig() { + HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "e6f7bbab7cf96cbb25837b7a94bf0f82"); + } + // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper // was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to // map call. So the test is there for consistency but not for correctness. I'm not sure we can trust