Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2011-12-29 11:37:15 -05:00
commit 1a45ea5a05
5 changed files with 79 additions and 35 deletions

View File

@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.PrintStream;
@ -300,7 +299,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES )
read = (new ReadClipper(read)).revertSoftClippedBases();
read = ReadClipper.revertSoftClippedBases(read);
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
//

View File

@ -286,7 +286,9 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1)
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);

View File

@ -134,7 +134,8 @@ public class ReadClipper {
wasClipped = true;
ops.clear();
if ( clippedRead.isEmpty() )
return new GATKSAMRecord( clippedRead.getHeader() );
return GATKSAMRecord.emptyRead(clippedRead);
// return new GATKSAMRecord( clippedRead.getHeader() );
return clippedRead;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // this should never happen
@ -151,7 +152,7 @@ public class ReadClipper {
* @return a new read, without the left tail.
*/
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
private GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) {
@ -168,7 +169,7 @@ public class ReadClipper {
* @return a new read, without the right tail.
*/
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
private GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) {
@ -184,9 +185,10 @@ public class ReadClipper {
*/
@Requires({"start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read
"start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
@ -208,15 +210,17 @@ public class ReadClipper {
@Requires({"left <= right", // tails cannot overlap
"left >= read.getAlignmentStart()", // coordinate has to be within the mapped read
"right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read
public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (read.isEmpty() || left == right)
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
// make the left cut index no longer part of the read. In that case, clip the read entirely.
if (left > leftTailRead.getAlignmentEnd())
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
@ -235,7 +239,7 @@ public class ReadClipper {
* @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped
* @return a new read without low quality tails
*/
public GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
private GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
if (read.isEmpty())
return read;
@ -249,7 +253,8 @@ public class ReadClipper {
// if the entire read should be clipped, then return an empty read.
if (leftClipIndex > rightClipIndex)
return (new GATKSAMRecord(read.getHeader()));
return GATKSAMRecord.emptyRead(read);
// return (new GATKSAMRecord(read.getHeader()));
if (rightClipIndex < read.getReadLength() - 1) {
this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1));
@ -269,7 +274,7 @@ public class ReadClipper {
*
* @return a new read without the soft clipped bases
*/
public GATKSAMRecord hardClipSoftClippedBases () {
private GATKSAMRecord hardClipSoftClippedBases () {
if (read.isEmpty())
return read;
@ -314,7 +319,7 @@ public class ReadClipper {
*
* @return a new read without adaptor sequence
*/
public GATKSAMRecord hardClipAdaptorSequence () {
private GATKSAMRecord hardClipAdaptorSequence () {
final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read);
if (adaptorBoundary == null || !ReadUtils.isInsideRead(read, adaptorBoundary))
@ -332,7 +337,7 @@ public class ReadClipper {
*
* @return a new read without leading insertions
*/
public GATKSAMRecord hardClipLeadingInsertions() {
private GATKSAMRecord hardClipLeadingInsertions() {
if (read.isEmpty())
return read;
@ -357,7 +362,7 @@ public class ReadClipper {
*
* @return a new read with every soft clip turned into a match
*/
public GATKSAMRecord revertSoftClippedBases() {
private GATKSAMRecord revertSoftClippedBases() {
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
}
@ -379,7 +384,8 @@ public class ReadClipper {
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");

View File

@ -45,7 +45,6 @@ import java.util.Map;
*/
public class GATKSAMRecord extends BAMRecord {
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
public static final String REDUCED_READ_FILTERED_TAG = "RF";
// the SAMRecord data we're caching
private String mReadString = null;
@ -317,6 +316,46 @@ public class GATKSAMRecord extends BAMRecord {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Creates an empty GATKSAMRecord with the read's header, read group and mate
* information, but empty (not-null) fields:
* - Cigar String
* - Read Bases
* - Base Qualities
*
* Use this method if you want to create a new empty GATKSAMRecord based on
* another GATKSAMRecord
*
* @param read
* @return
*/
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
read.getReferenceIndex(),
0,
(short) 0,
(short) 0,
0,
0,
read.getFlags(),
0,
read.getMateReferenceIndex(),
read.getMateAlignmentStart(),
read.getInferredInsertSize(),
null);
emptyRead.setCigarString("");
emptyRead.setReadBases(new byte[0]);
emptyRead.setBaseQualities(new byte[0]);
SAMReadGroupRecord samRG = read.getReadGroup();
emptyRead.clearAttributes();
if (samRG != null) {
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
emptyRead.setReadGroup(rg);
}
return emptyRead;
}
}

View File

@ -60,7 +60,7 @@ public class ReadClipperUnitTest extends BaseTest {
int alnEnd = read.getAlignmentEnd();
int readLength = alnStart - alnEnd;
for (int i=0; i<readLength/2; i++) {
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(alnStart + i, alnEnd - i);
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
}
@ -73,10 +73,10 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
for (int i=0; i<readLength; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i);
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1);
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1);
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
}
}
@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest {
int alnEnd = read.getAlignmentEnd();
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
@ -126,9 +126,9 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
}
@ -154,7 +154,7 @@ public class ReadClipperUnitTest extends BaseTest {
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
// Tests
@ -162,14 +162,14 @@ public class ReadClipperUnitTest extends BaseTest {
assertNoLowQualBases(clipLeft, LOW_QUAL);
// Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
// create a read with nLowQualBases in the right tail
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addRight = 0; addRight < nLowQualBases; addRight++)
quals[readLength - addRight - 1] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
// Tests
@ -187,7 +187,7 @@ public class ReadClipperUnitTest extends BaseTest {
quals[readLength - addBoth - 1] = LOW_QUAL;
}
read.setBaseQualities(quals);
GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
// Tests
@ -214,8 +214,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
ReadClipper lowQualClipper = new ReadClipper(read);
ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected);
}
@Test(enabled = true)
@ -224,8 +223,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Generate a list of cigars to test
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
ReadClipper readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
int sumHardClips = 0;
int sumMatches = 0;
@ -276,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest {
for (Cigar cigar : cigarList) {
if (startsWithInsertion(cigar)) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
@ -300,7 +298,7 @@ public class ReadClipperUnitTest extends BaseTest {
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord unclipped = (new ReadClipper(read)).revertSoftClippedBases();
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;