From 64b4d8072923612b38662aad984a836dc8093fcb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 May 2013 13:16:00 -0400 Subject: [PATCH] Make BQSR calculateIsIndel robust to indel CIGARs are start/end of read -- The previous implementation attempted to be robust to this, but not all cases were handled properly. Added a helper function updateInde() that bounds up the update to be in the range of the indel array, and cleaned up logic of how the method works. The previous behavior was inconsistent across read fwd/rev stand, so that the indel cigars at the end of read were put at the start of reads if the reads were in the forward strand but not if they were in the reverse strand. Everything is now consistent, as can be seen in the symmetry of the unit tests: tests.add(new Object[]{"1D3M", false, EventType.BASE_DELETION, new int[]{0,0,0}}); tests.add(new Object[]{"1M1D2M", false, EventType.BASE_DELETION, new int[]{1,0,0}}); tests.add(new Object[]{"2M1D1M", false, EventType.BASE_DELETION, new int[]{0,1,0}}); tests.add(new Object[]{"3M1D", false, EventType.BASE_DELETION, new int[]{0,0,1}}); tests.add(new Object[]{"1D3M", true, EventType.BASE_DELETION, new int[]{1,0,0}}); tests.add(new Object[]{"1M1D2M", true, EventType.BASE_DELETION, new int[]{0,1,0}}); tests.add(new Object[]{"2M1D1M", true, EventType.BASE_DELETION, new int[]{0,0,1}}); tests.add(new Object[]{"3M1D", true, EventType.BASE_DELETION, new int[]{0,0,0}}); tests.add(new Object[]{"4M1I", false, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}}); tests.add(new Object[]{"3M1I1M", false, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}}); tests.add(new Object[]{"2M1I2M", false, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}}); tests.add(new Object[]{"1M1I3M", false, EventType.BASE_INSERTION, new int[]{1,0,0,0,0}}); tests.add(new Object[]{"1I4M", false, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}}); tests.add(new Object[]{"4M1I", true, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}}); tests.add(new Object[]{"3M1I1M", true, EventType.BASE_INSERTION, new int[]{0,0,0,0,1}}); tests.add(new Object[]{"2M1I2M", true, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}}); tests.add(new Object[]{"1M1I3M", true, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}}); tests.add(new Object[]{"1I4M", true, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}}); -- delivers #50445353 --- .../gatk/walkers/bqsr/BaseRecalibrator.java | 22 ++++++++++--------- .../walkers/bqsr/BQSRIntegrationTest.java | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 278317da3..c60eceaa4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -366,9 +366,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) { - final byte[] readBases = read.getReadBases(); - final int[] indel = new int[readBases.length]; - Arrays.fill(indel, 0); + final int[] indel = new int[read.getReadBases().length]; int readPos = 0; for ( final CigarElement ce : read.getCigar().getCigarElements() ) { final int elementLength = ce.getLength(); @@ -383,21 +381,19 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } case D: { - final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) ); - indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 ); + final int index = ( read.getReadNegativeStrandFlag() ? readPos : readPos - 1 ); + updateIndel(indel, index, mode, EventType.BASE_DELETION); break; } case I: { final boolean forwardStrandRead = !read.getReadNegativeStrandFlag(); if( forwardStrandRead ) { - indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); - } - for (int iii = 0; iii < elementLength; iii++) { - readPos++; + updateIndel(indel, readPos - 1, mode, EventType.BASE_INSERTION); } + readPos += elementLength; if( !forwardStrandRead ) { - indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); + updateIndel(indel, readPos, mode, EventType.BASE_INSERTION); } break; } @@ -412,6 +408,12 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche return indel; } + private static void updateIndel(final int[] indel, final int index, final EventType mode, final EventType requiredMode) { + if ( mode == requiredMode && index >= 0 && index < indel.length ) + // protect ourselves from events at the start or end of the read (1D3M or 3M1D) + indel[index] = 1; + } + protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) { if(errorArray.length != baqArray.length ) { throw new ReviewedStingException("Array length mismatch detected. Malformed read?"); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 907046704..71c29fe0b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -111,7 +111,7 @@ public class BQSRIntegrationTest extends WalkerTest { {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1425a5063ee757dbfc013df24e65a67a")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")},