Bugfix for HaplotypeCaller error: Only one of refStart or refStop must be < 0, not both
-- This occurred because we were reverting reads with soft clips that would produce reads with negative (or 0) alignment starts. From such reads we could end up with adaptor starts that were negative and that would ultimately produce the "Only one of refStart or refStop must be < 0, not both" error in the FragmentUtils merging code (which would revert and adaptor clip reads). -- We now hard clip away bases soft clipped reverted bases that fall before the 1-based contig start in revertSoftClippedBases. -- Replace buggy cigarFromString with proper SAM-JDK call TextCigarCodec.getSingleton().decode(cigarString) -- Added unit tests for reverting soft clipped bases that create a read before the contig -- [delivers #50892431]
This commit is contained in:
parent
a0817b696b
commit
e19c24f3ee
|
|
@ -194,9 +194,17 @@ public class ClippingOp {
|
|||
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||
|
||||
unclipped.setCigar(unclippedCigar);
|
||||
unclipped.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar));
|
||||
final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
|
||||
unclipped.setAlignmentStart(newStart);
|
||||
|
||||
return unclipped;
|
||||
if ( newStart <= 0 ) {
|
||||
// if the start of the unclipped read occurs before the contig,
|
||||
// we must hard clip away the bases since we cannot represent reads with
|
||||
// negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
|
||||
return hardClip(unclipped, 0, - newStart);
|
||||
} else {
|
||||
return unclipped;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -335,7 +343,24 @@ public class ClippingOp {
|
|||
return newCigar;
|
||||
}
|
||||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||
/**
|
||||
* Hard clip bases from read, from start to stop in base coordinates
|
||||
*
|
||||
* If start == 0, then we will clip from the front of the read, otherwise we clip
|
||||
* from the right. If start == 0 and stop == 10, this would clip out the first
|
||||
* 10 bases of the read.
|
||||
*
|
||||
* Note that this function works with reads with negative alignment starts, in order to
|
||||
* allow us to hardClip reads that have had their soft clips reverted and so might have
|
||||
* negative alignment starts
|
||||
*
|
||||
* Works properly with reduced reads and insertion/deletion base qualities
|
||||
*
|
||||
* @param read a non-null read
|
||||
* @param start a start >= 0 and < read.length
|
||||
* @param stop a stop >= 0 and < read.length.
|
||||
* @return a cloned version of read that has been properly trimmed down
|
||||
*/
|
||||
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||
final int firstBaseAfterSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||
final int lastBaseBeforeSoftClips = read.getSoftEnd() - read.getSoftStart();
|
||||
|
|
@ -343,7 +368,6 @@ public class ClippingOp {
|
|||
if (start == firstBaseAfterSoftClips && stop == lastBaseBeforeSoftClips) // note that if the read has no soft clips, these constants will be 0 and read length - 1 (beauty of math).
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
|
||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);
|
||||
|
||||
|
|
@ -357,7 +381,7 @@ public class ClippingOp {
|
|||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
|
||||
|
||||
GATKSAMRecord hardClippedRead;
|
||||
final GATKSAMRecord hardClippedRead;
|
||||
try {
|
||||
hardClippedRead = (GATKSAMRecord) read.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ package org.broadinstitute.sting.utils.clipping;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -38,13 +38,6 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 11/27/11
|
||||
* Time: 6:45 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipperTestUtils {
|
||||
//Should contain all the utils needed for tests to mass produce
|
||||
//reads, cigars, and other needed classes
|
||||
|
|
@ -236,78 +229,6 @@ public class ReadClipperTestUtils {
|
|||
}
|
||||
|
||||
public static Cigar cigarFromString(String cigarString) {
|
||||
Cigar cigar = new Cigar();
|
||||
|
||||
boolean isNumber = false;
|
||||
int number = 0;
|
||||
for (int i = 0; i < cigarString.length(); i++) {
|
||||
char x = cigarString.charAt(i);
|
||||
|
||||
if (x >= '0' && x <='9') {
|
||||
if (isNumber) {
|
||||
number *= 10;
|
||||
}
|
||||
else {
|
||||
isNumber = true;
|
||||
}
|
||||
number += x - '0';
|
||||
}
|
||||
|
||||
else {
|
||||
CigarElement e;
|
||||
switch (x) {
|
||||
case 'M':
|
||||
case 'm':
|
||||
e = new CigarElement(number, CigarOperator.M);
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
case 'i':
|
||||
e = new CigarElement(number, CigarOperator.I);
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
case 'd':
|
||||
e = new CigarElement(number, CigarOperator.D);
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
case 's':
|
||||
e = new CigarElement(number, CigarOperator.S);
|
||||
break;
|
||||
|
||||
case 'N':
|
||||
case 'n':
|
||||
e = new CigarElement(number, CigarOperator.N);
|
||||
break;
|
||||
|
||||
case 'H':
|
||||
case 'h':
|
||||
e = new CigarElement(number, CigarOperator.H);
|
||||
break;
|
||||
|
||||
case 'P':
|
||||
case 'p':
|
||||
e = new CigarElement(number, CigarOperator.P);
|
||||
break;
|
||||
|
||||
case '=':
|
||||
e = new CigarElement(number, CigarOperator.EQ);
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
case 'x':
|
||||
e = new CigarElement(number, CigarOperator.X);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")");
|
||||
}
|
||||
cigar.add(e);
|
||||
}
|
||||
}
|
||||
return cigar;
|
||||
return TextCigarCodec.getSingleton().decode(cigarString);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import java.util.List;
|
|||
* Date: 9/28/11
|
||||
*/
|
||||
public class ReadClipperUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
List<Cigar> cigarList;
|
||||
int maximumCigarSize = 10; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
|
|
@ -55,7 +56,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -71,7 +72,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReadCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -101,7 +102,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClippedReadLengthData", enabled = true)
|
||||
@Test(dataProvider = "ClippedReadLengthData", enabled = !DEBUG)
|
||||
public void testHardClipReadLengthIsRight(final int originalReadLength, final int nToClip) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M");
|
||||
read.getReadLength(); // provoke the caching of the read length
|
||||
|
|
@ -112,7 +113,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
clipped.getReadLength(), clipped.getCigar(), expectedReadLength, nToClip, read.getReadLength(), read.getCigar()));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -135,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -154,7 +155,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -172,7 +173,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipLowQualEnds() {
|
||||
final byte LOW_QUAL = 2;
|
||||
final byte HIGH_QUAL = 30;
|
||||
|
|
@ -216,7 +217,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipSoftClippedBases() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -251,7 +252,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertSoftClippedBases() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||
|
|
@ -273,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertSoftClippedBasesWithThreshold() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||
|
|
@ -292,6 +293,40 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RevertSoftClipsBeforeContig")
|
||||
public Object[][] makeRevertSoftClipsBeforeContig() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
for ( int softStart : Arrays.asList(-10, -1, 0) ) {
|
||||
for ( int alignmentStart : Arrays.asList(1, 10) ) {
|
||||
tests.add(new Object[]{softStart, alignmentStart});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig")
|
||||
public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) {
|
||||
final int nMatches = 10;
|
||||
final int nSoft = -1 * (softStart - alignmentStart);
|
||||
final String cigar = nSoft + "S" + nMatches + "M";
|
||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
read.setAlignmentStart(alignmentStart);
|
||||
|
||||
Assert.assertEquals(read.getSoftStart(), softStart);
|
||||
Assert.assertEquals(read.getAlignmentStart(), alignmentStart);
|
||||
Assert.assertEquals(read.getCigarString(), cigar);
|
||||
|
||||
final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read);
|
||||
|
||||
final int expectedAlignmentStart = 1;
|
||||
final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M";
|
||||
Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart);
|
||||
Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart);
|
||||
Assert.assertEquals(reverted.getCigarString(), expectedCigar);
|
||||
}
|
||||
|
||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
|
|
@ -375,7 +410,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipReducedRead() {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("10M");
|
||||
final int[] counts = new int[read.getReadLength()];
|
||||
|
|
@ -391,7 +426,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertEntirelySoftclippedReads() {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H");
|
||||
GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.utils.fragments;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -296,4 +297,51 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertNull(actual);
|
||||
}
|
||||
|
||||
@DataProvider(name = "MergeFragmentsOffContig")
|
||||
public Object[][] makeMergeFragmentsOffContig() throws Exception {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final int pre1 : Arrays.asList(0, 50)) {
|
||||
for ( final int post1 : Arrays.asList(0, 50)) {
|
||||
for ( final int pre2 : Arrays.asList(0, 50)) {
|
||||
for ( final int post2 : Arrays.asList(0, 50)) {
|
||||
tests.add(new Object[]{pre1, post1, pre2, post2});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "MergeFragmentsOffContig")
|
||||
public void testMergeFragmentsOffContig(final int pre1, final int post1, final int pre2, final int post2) {
|
||||
final int contigSize = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 0, contigSize);
|
||||
|
||||
final GATKSAMRecord read1 = createReadOffContig(header, false, pre1, post1);
|
||||
final GATKSAMRecord read2 = createReadOffContig(header, true, pre2, post2);
|
||||
|
||||
final GATKSAMRecord merged = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
}
|
||||
|
||||
private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) {
|
||||
final int contigLen = header.getSequence(0).getSequenceLength();
|
||||
final int readLen = pre + contigLen + post;
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen);
|
||||
read.setAlignmentStart(1);
|
||||
read.setCigar(TextCigarCodec.getSingleton().decode(pre + "S" + contigLen + "M" + post + "S"));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLen));
|
||||
read.setReadBases(Utils.dupBytes((byte)'A', readLen));
|
||||
read.setMappingQuality(60);
|
||||
read.setMateAlignmentStart(1);
|
||||
read.setProperPairFlag(true);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setInferredInsertSize(30);
|
||||
read.setReadNegativeStrandFlag(negStrand);
|
||||
read.setMateNegativeStrandFlag(! negStrand);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("foo"));
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue