Fixing BQSR/BAQ bug:

If a read had an existing BAQ tag, was clipped by our engine, and couldn't have the BAQ recalculated (for whatever reason), then we would
fail in the BQSR because we would default to using the old tag (which no longer matched the length of the read bases).
The right thing to do here is to remove the old BAQ tag when RECALCULATE and ADD_TAG are the BAQ modes used but BAQ cannot be recalculated.
Added a unit test to ensure that the tags are removed in such a case.
This commit is contained in:
Eric Banks 2013-01-31 11:03:17 -05:00
parent 495bca3d1a
commit 9c0207f8ef
2 changed files with 23 additions and 1 deletions

View File

@ -673,7 +673,9 @@ public class BAQ {
} else if ( excludeReadFromBAQ(read) ) {
; // just fall through
} else {
if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) {
final boolean readHasBAQTag = hasBAQTag(read);
if ( calculationType == CalculationMode.RECALCULATE || ! readHasBAQTag ) {
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader);
if ( hmmResult != null ) {
@ -683,6 +685,9 @@ public class BAQ {
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode);
}
} else if ( readHasBAQTag ) {
// remove the BAQ tag if it's there because we cannot trust it
read.setAttribute(BAQ_TAG, null);
}
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");

View File

@ -203,6 +203,23 @@ public class BAQUnitTest extends BaseTest {
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
}
@Test(enabled = true)
public void testBAQOverwritesExistingTagWithNull() {
// create a read with a single base off the end of the contig, which cannot be BAQed
final SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, fasta.getSequenceDictionary().getSequence("chr1").getSequenceLength() + 1, 1);
read.setReadBases(new byte[] {(byte) 'A'});
read.setBaseQualities(new byte[] {(byte) 20});
read.setCigarString("1M");
read.setAttribute("BQ", "A");
// try to BAQ and tell it to RECALCULATE AND ADD_TAG
BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false);
baq.baqRead(read, fasta, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
// did we remove the existing tag?
Assert.assertTrue(read.getAttribute("BQ") == null);
}
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters