Fixed reported bug in BQSR for RNA seq alignments with Ns.
* ClippingOp updated to incorporate Ns in the hard clips.
* ReadUtils.getReadCoordinateForReferenceCoordinate() updated to account for Ns.
* Added test that covers the BQSR case we saw.
* Created GSA-856 (for Mauricio) to add lots of tests to ReadUtils.
* It will require refactoring code and not in the scope of what I was willing to do to fix this.
This commit is contained in:
parent
be729410b9
commit
573ed07ad0
|
|
@ -581,8 +581,8 @@ public class ClippingOp {
|
|||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
return -clippedLength;
|
||||
|
||||
// Deletions should be added to the total hard clip count
|
||||
else if (cigarElement.getOperator() == CigarOperator.DELETION)
|
||||
// Deletions and Ns should be added to the total hard clip count (because we want to maintain the original alignment start)
|
||||
else if (cigarElement.getOperator() == CigarOperator.DELETION || cigarElement.getOperator() == CigarOperator.SKIPPED_REGION)
|
||||
return cigarElement.getLength();
|
||||
|
||||
// There is no shift if we are not clipping an indel
|
||||
|
|
|
|||
|
|
@ -524,7 +524,7 @@ public class ReadUtils {
|
|||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||
// base before the deletion (see warning in function contracts)
|
||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||
else if (fallsInsideDeletion && !endsWithinCigar && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += shift - 1;
|
||||
|
||||
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
||||
|
|
|
|||
|
|
@ -25,13 +25,19 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
|
|
@ -179,4 +185,20 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
final List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
Assert.assertEquals(ReadUtils.getMaxReadLength(reads), 0, "Empty list should have max length of zero");
|
||||
}
|
||||
|
||||
@Test (enabled = true)
|
||||
public void testReadWithNs() throws FileNotFoundException {
|
||||
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
final int readLength = 76;
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setCigarString("3M414N1D73M");
|
||||
|
||||
final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
Assert.assertEquals(result, 3);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue