Merge pull request #257 from broadinstitute/md_unclip_reads_over_contig
Bugfix for HaplotypeCaller error: Only one of refStart or refStop must b...
This commit is contained in:
commit
eaebba5ba1
|
|
@ -194,9 +194,17 @@ public class ClippingOp {
|
|||
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||
|
||||
unclipped.setCigar(unclippedCigar);
|
||||
unclipped.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar));
|
||||
final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
|
||||
unclipped.setAlignmentStart(newStart);
|
||||
|
||||
return unclipped;
|
||||
if ( newStart <= 0 ) {
|
||||
// if the start of the unclipped read occurs before the contig,
|
||||
// we must hard clip away the bases since we cannot represent reads with
|
||||
// negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
|
||||
return hardClip(unclipped, 0, - newStart);
|
||||
} else {
|
||||
return unclipped;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -335,7 +343,24 @@ public class ClippingOp {
|
|||
return newCigar;
|
||||
}
|
||||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||
/**
|
||||
* Hard clip bases from read, from start to stop in base coordinates
|
||||
*
|
||||
* If start == 0, then we will clip from the front of the read, otherwise we clip
|
||||
* from the right. If start == 0 and stop == 10, this would clip out the first
|
||||
* 10 bases of the read.
|
||||
*
|
||||
* Note that this function works with reads with negative alignment starts, in order to
|
||||
* allow us to hardClip reads that have had their soft clips reverted and so might have
|
||||
* negative alignment starts
|
||||
*
|
||||
* Works properly with reduced reads and insertion/deletion base qualities
|
||||
*
|
||||
* @param read a non-null read
|
||||
* @param start a start >= 0 and < read.length
|
||||
* @param stop a stop >= 0 and < read.length.
|
||||
* @return a cloned version of read that has been properly trimmed down
|
||||
*/
|
||||
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||
final int firstBaseAfterSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||
final int lastBaseBeforeSoftClips = read.getSoftEnd() - read.getSoftStart();
|
||||
|
|
@ -343,7 +368,6 @@ public class ClippingOp {
|
|||
if (start == firstBaseAfterSoftClips && stop == lastBaseBeforeSoftClips) // note that if the read has no soft clips, these constants will be 0 and read length - 1 (beauty of math).
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
|
||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);
|
||||
|
||||
|
|
@ -357,7 +381,7 @@ public class ClippingOp {
|
|||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
|
||||
|
||||
GATKSAMRecord hardClippedRead;
|
||||
final GATKSAMRecord hardClippedRead;
|
||||
try {
|
||||
hardClippedRead = (GATKSAMRecord) read.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ package org.broadinstitute.sting.utils.clipping;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -38,13 +38,6 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 11/27/11
|
||||
* Time: 6:45 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipperTestUtils {
|
||||
//Should contain all the utils needed for tests to mass produce
|
||||
//reads, cigars, and other needed classes
|
||||
|
|
@ -236,78 +229,6 @@ public class ReadClipperTestUtils {
|
|||
}
|
||||
|
||||
public static Cigar cigarFromString(String cigarString) {
|
||||
Cigar cigar = new Cigar();
|
||||
|
||||
boolean isNumber = false;
|
||||
int number = 0;
|
||||
for (int i = 0; i < cigarString.length(); i++) {
|
||||
char x = cigarString.charAt(i);
|
||||
|
||||
if (x >= '0' && x <='9') {
|
||||
if (isNumber) {
|
||||
number *= 10;
|
||||
}
|
||||
else {
|
||||
isNumber = true;
|
||||
}
|
||||
number += x - '0';
|
||||
}
|
||||
|
||||
else {
|
||||
CigarElement e;
|
||||
switch (x) {
|
||||
case 'M':
|
||||
case 'm':
|
||||
e = new CigarElement(number, CigarOperator.M);
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
case 'i':
|
||||
e = new CigarElement(number, CigarOperator.I);
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
case 'd':
|
||||
e = new CigarElement(number, CigarOperator.D);
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
case 's':
|
||||
e = new CigarElement(number, CigarOperator.S);
|
||||
break;
|
||||
|
||||
case 'N':
|
||||
case 'n':
|
||||
e = new CigarElement(number, CigarOperator.N);
|
||||
break;
|
||||
|
||||
case 'H':
|
||||
case 'h':
|
||||
e = new CigarElement(number, CigarOperator.H);
|
||||
break;
|
||||
|
||||
case 'P':
|
||||
case 'p':
|
||||
e = new CigarElement(number, CigarOperator.P);
|
||||
break;
|
||||
|
||||
case '=':
|
||||
e = new CigarElement(number, CigarOperator.EQ);
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
case 'x':
|
||||
e = new CigarElement(number, CigarOperator.X);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")");
|
||||
}
|
||||
cigar.add(e);
|
||||
}
|
||||
}
|
||||
return cigar;
|
||||
return TextCigarCodec.getSingleton().decode(cigarString);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import java.util.List;
|
|||
* Date: 9/28/11
|
||||
*/
|
||||
public class ReadClipperUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
List<Cigar> cigarList;
|
||||
int maximumCigarSize = 10; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
|
|
@ -55,7 +56,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -71,7 +72,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReadCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -101,7 +102,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClippedReadLengthData", enabled = true)
|
||||
@Test(dataProvider = "ClippedReadLengthData", enabled = !DEBUG)
|
||||
public void testHardClipReadLengthIsRight(final int originalReadLength, final int nToClip) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M");
|
||||
read.getReadLength(); // provoke the caching of the read length
|
||||
|
|
@ -112,7 +113,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
clipped.getReadLength(), clipped.getCigar(), expectedReadLength, nToClip, read.getReadLength(), read.getCigar()));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -135,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -154,7 +155,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -172,7 +173,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipLowQualEnds() {
|
||||
final byte LOW_QUAL = 2;
|
||||
final byte HIGH_QUAL = 30;
|
||||
|
|
@ -216,7 +217,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipSoftClippedBases() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
|
|
@ -251,7 +252,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertSoftClippedBases() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||
|
|
@ -273,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertSoftClippedBasesWithThreshold() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||
|
|
@ -292,6 +293,40 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RevertSoftClipsBeforeContig")
|
||||
public Object[][] makeRevertSoftClipsBeforeContig() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
for ( int softStart : Arrays.asList(-10, -1, 0) ) {
|
||||
for ( int alignmentStart : Arrays.asList(1, 10) ) {
|
||||
tests.add(new Object[]{softStart, alignmentStart});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig")
|
||||
public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) {
|
||||
final int nMatches = 10;
|
||||
final int nSoft = -1 * (softStart - alignmentStart);
|
||||
final String cigar = nSoft + "S" + nMatches + "M";
|
||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
read.setAlignmentStart(alignmentStart);
|
||||
|
||||
Assert.assertEquals(read.getSoftStart(), softStart);
|
||||
Assert.assertEquals(read.getAlignmentStart(), alignmentStart);
|
||||
Assert.assertEquals(read.getCigarString(), cigar);
|
||||
|
||||
final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read);
|
||||
|
||||
final int expectedAlignmentStart = 1;
|
||||
final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M";
|
||||
Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart);
|
||||
Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart);
|
||||
Assert.assertEquals(reverted.getCigarString(), expectedCigar);
|
||||
}
|
||||
|
||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
|
|
@ -375,7 +410,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClipReducedRead() {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("10M");
|
||||
final int[] counts = new int[read.getReadLength()];
|
||||
|
|
@ -391,7 +426,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testRevertEntirelySoftclippedReads() {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H");
|
||||
GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.utils.fragments;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -296,4 +297,51 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertNull(actual);
|
||||
}
|
||||
|
||||
@DataProvider(name = "MergeFragmentsOffContig")
|
||||
public Object[][] makeMergeFragmentsOffContig() throws Exception {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final int pre1 : Arrays.asList(0, 50)) {
|
||||
for ( final int post1 : Arrays.asList(0, 50)) {
|
||||
for ( final int pre2 : Arrays.asList(0, 50)) {
|
||||
for ( final int post2 : Arrays.asList(0, 50)) {
|
||||
tests.add(new Object[]{pre1, post1, pre2, post2});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "MergeFragmentsOffContig")
|
||||
public void testMergeFragmentsOffContig(final int pre1, final int post1, final int pre2, final int post2) {
|
||||
final int contigSize = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 0, contigSize);
|
||||
|
||||
final GATKSAMRecord read1 = createReadOffContig(header, false, pre1, post1);
|
||||
final GATKSAMRecord read2 = createReadOffContig(header, true, pre2, post2);
|
||||
|
||||
final GATKSAMRecord merged = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
}
|
||||
|
||||
private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) {
|
||||
final int contigLen = header.getSequence(0).getSequenceLength();
|
||||
final int readLen = pre + contigLen + post;
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen);
|
||||
read.setAlignmentStart(1);
|
||||
read.setCigar(TextCigarCodec.getSingleton().decode(pre + "S" + contigLen + "M" + post + "S"));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLen));
|
||||
read.setReadBases(Utils.dupBytes((byte)'A', readLen));
|
||||
read.setMappingQuality(60);
|
||||
read.setMateAlignmentStart(1);
|
||||
read.setProperPairFlag(true);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setInferredInsertSize(30);
|
||||
read.setReadNegativeStrandFlag(negStrand);
|
||||
read.setMateNegativeStrandFlag(! negStrand);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("foo"));
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue