Make BQSR calculateIsIndel robust to indel CIGARs are start/end of read

-- The previous implementation attempted to be robust to this, but not all cases were handled properly.  Added a helper function updateInde() that bounds up the update to be in the range of the indel array, and cleaned up logic of how the method works.  The previous behavior was inconsistent across read fwd/rev stand, so that the indel cigars at the end of read were put at the start of reads if the reads were in the forward strand but not if they were in the reverse strand.  Everything is now consistent, as can be seen in the symmetry of the unit tests:

        tests.add(new Object[]{"1D3M",   false, EventType.BASE_DELETION, new int[]{0,0,0}});
        tests.add(new Object[]{"1M1D2M", false, EventType.BASE_DELETION, new int[]{1,0,0}});
        tests.add(new Object[]{"2M1D1M", false, EventType.BASE_DELETION, new int[]{0,1,0}});
        tests.add(new Object[]{"3M1D",   false, EventType.BASE_DELETION, new int[]{0,0,1}});

        tests.add(new Object[]{"1D3M",   true, EventType.BASE_DELETION, new int[]{1,0,0}});
        tests.add(new Object[]{"1M1D2M", true, EventType.BASE_DELETION, new int[]{0,1,0}});
        tests.add(new Object[]{"2M1D1M", true, EventType.BASE_DELETION, new int[]{0,0,1}});
        tests.add(new Object[]{"3M1D",   true, EventType.BASE_DELETION, new int[]{0,0,0}});

        tests.add(new Object[]{"4M1I",   false, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}});
        tests.add(new Object[]{"3M1I1M", false, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}});
        tests.add(new Object[]{"2M1I2M", false, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}});
        tests.add(new Object[]{"1M1I3M", false, EventType.BASE_INSERTION, new int[]{1,0,0,0,0}});
        tests.add(new Object[]{"1I4M",   false, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}});

        tests.add(new Object[]{"4M1I",   true, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}});
        tests.add(new Object[]{"3M1I1M", true, EventType.BASE_INSERTION, new int[]{0,0,0,0,1}});
        tests.add(new Object[]{"2M1I2M", true, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}});
        tests.add(new Object[]{"1M1I3M", true, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}});
        tests.add(new Object[]{"1I4M",   true, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}});

-- delivers #50445353
This commit is contained in:
Mark DePristo 2013-05-31 13:16:00 -04:00
parent a96f48bc39
commit 64b4d80729
2 changed files with 13 additions and 11 deletions

View File

@ -366,9 +366,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
}
protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) {
final byte[] readBases = read.getReadBases();
final int[] indel = new int[readBases.length];
Arrays.fill(indel, 0);
final int[] indel = new int[read.getReadBases().length];
int readPos = 0;
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
@ -383,21 +381,19 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
}
case D:
{
final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) );
indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 );
final int index = ( read.getReadNegativeStrandFlag() ? readPos : readPos - 1 );
updateIndel(indel, index, mode, EventType.BASE_DELETION);
break;
}
case I:
{
final boolean forwardStrandRead = !read.getReadNegativeStrandFlag();
if( forwardStrandRead ) {
indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
}
for (int iii = 0; iii < elementLength; iii++) {
readPos++;
updateIndel(indel, readPos - 1, mode, EventType.BASE_INSERTION);
}
readPos += elementLength;
if( !forwardStrandRead ) {
indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
updateIndel(indel, readPos, mode, EventType.BASE_INSERTION);
}
break;
}
@ -412,6 +408,12 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
return indel;
}
private static void updateIndel(final int[] indel, final int index, final EventType mode, final EventType requiredMode) {
if ( mode == requiredMode && index >= 0 && index < indel.length )
// protect ourselves from events at the start or end of the read (1D3M or 3M1D)
indel[index] = 1;
}
protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) {
if(errorArray.length != baqArray.length ) {
throw new ReviewedStingException("Array length mismatch detected. Malformed read?");

View File

@ -111,7 +111,7 @@ public class BQSRIntegrationTest extends WalkerTest {
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1425a5063ee757dbfc013df24e65a67a")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")},