New ReduceReads v2 with unclipped variant regions and soft-clipped bases

* Re-wrote the sliding window approach to allow the variant region not to clip the reads that overlap it.
   * Updated consensus to include only reads that were not passed on by the variant region, header counts are updated on the fly to avoid recompute
   * Added soft clipped bases to ReduceReads analysis by unclipping high quality soft-clips then re-clipping after reduce reads
   * Updated all integration tests
This commit is contained in:
Mauricio Carneiro 2012-05-14 13:39:42 -04:00
parent edf08ca928
commit 4aad7e23ef
3 changed files with 114 additions and 12 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -135,10 +136,9 @@ public class ReadClipper {
ops.clear();
if ( clippedRead.isEmpty() )
return GATKSAMRecord.emptyRead(clippedRead);
// return new GATKSAMRecord( clippedRead.getHeader() );
return clippedRead;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // this should never happen
throw new RuntimeException(e); // this should never happen
}
}
}
@ -188,7 +188,6 @@ public class ReadClipper {
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
@ -213,14 +212,12 @@ public class ReadClipper {
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (read.isEmpty() || left == right)
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
// make the left cut index no longer part of the read. In that case, clip the read entirely.
if (left > leftTailRead.getAlignmentEnd())
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
@ -402,17 +399,77 @@ public class ReadClipper {
/**
* Turns soft clipped bases into matches
*
* @return a new read with every soft clip turned into a match
*/
private GATKSAMRecord revertSoftClippedBases() {
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
if (read.isEmpty())
return read;
this.addOp(new ClippingOp(0, 0));
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
}
/**
* Reverts ALL soft-clipped bases
*
* @param read the read
* @return the read with all soft-clipped bases turned into matches
*/
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
return (new ReadClipper(read)).revertSoftClippedBases();
}
/**
* Reverts only soft clipped bases with quality score greater than or equal to minQual
*
* Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
*
* @param read the read
* @param minQual the mininum base quality score to revert the base (inclusive)
* @return the read with high quality soft clips reverted
*/
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
return GATKSAMRecord.emptyRead(read);
byte [] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
int left = -1;
if (nLeadingSoftClips > 0) {
for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
if (quals[i] >= minQual)
left = i;
else
break;
}
}
int right = -1;
int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
if (nTailingSoftClips > 0) {
for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {
if (quals[i] >= minQual)
right = i;
else
break;
}
}
GATKSAMRecord clippedRead = read;
if (right >= 0) {
if (right + 1 < clippedRead.getReadLength())
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail
clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing
}
if (left >= 0) {
if (left - 1 > 0)
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail
clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing
}
return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all
}
/**
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
@ -446,9 +503,6 @@ public class ReadClipper {
stop = read.getReadLength() - 1;
}
// if ((start == 0 && stop == read.getReadLength() - 1))
// return GATKSAMRecord.emptyRead(read);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");

View File

@ -428,8 +428,8 @@ public class GATKSAMRecord extends BAMRecord {
* Use this method if you want to create a new empty GATKSAMRecord based on
* another GATKSAMRecord
*
* @param read
* @return
* @param read a read to copy the header from
* @return a read with no bases but safe for the GATK
*/
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),

View File

@ -0,0 +1,48 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import java.util.Comparator;
public class ReadUnclippedStartWithNoTiesComparator implements Comparator<SAMRecord> {
@Requires("c1 >= 0 && c2 >= 0")
@Ensures("result == 0 || result == 1 || result == -1")
private int compareContigs(int c1, int c2) {
if (c1 == c2)
return 0;
else if (c1 > c2)
return 1;
return -1;
}
@Requires("r1 != null && r2 != null")
@Ensures("result == 0 || result == 1 || result == -1")
public int compare(SAMRecord r1, SAMRecord r2) {
int result;
if (r1 == r2)
result = 0;
else if (r1.getReadUnmappedFlag())
result = 1;
else if (r2.getReadUnmappedFlag())
result = -1;
else {
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
if (cmpContig != 0)
result = cmpContig;
else {
if (r1.getUnclippedStart() < r2.getUnclippedStart())
result = -1;
else
result = 1;
}
}
return result;
}
}