Added low quality clipping

Clips both tails of a read if the tails are below a given quality threshold (default Q2).
*Added special treatment for reads that get completely clipped.
This commit is contained in:
Mauricio Carneiro 2011-08-16 13:51:25 -04:00
parent 993ecb85da
commit b135565183
2 changed files with 34 additions and 9 deletions

View File

@ -76,8 +76,8 @@ public class ReadClipper {
int rightClipIndex = read.getReadLength() - 1;
// check how far we can clip both sides
while (quals[rightClipIndex] <= lowQual) rightClipIndex--;
while (quals[leftClipIndex] <= lowQual) leftClipIndex++;
while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--;
while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++;
// if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now)
if (leftClipIndex > rightClipIndex)

View File

@ -630,26 +630,51 @@ public class ReadUtils {
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
int start = getSoftUnclippedStart(read);
int stop = getSoftUnclippedStop(read);
if ( (!read.getReferenceName().equals(interval.getContig())) ||
(read.getUnclippedEnd() < interval.getStart()) ||
(read.getUnclippedStart() > interval.getStop()) )
(stop < interval.getStart()) ||
(start > interval.getStop()) )
return ReadAndIntervalOverlap.NO_OVERLAP;
else if ( (read.getUnclippedStart() >= interval.getStart()) &&
(read.getUnclippedEnd() <= interval.getStop()) )
else if ( (start >= interval.getStart()) &&
(stop <= interval.getStop()) )
return ReadAndIntervalOverlap.CONTAINED;
else if ( (read.getUnclippedStart() < interval.getStart()) &&
(read.getUnclippedEnd() > interval.getStop()) )
else if ( (start < interval.getStart()) &&
(stop > interval.getStop()) )
return ReadAndIntervalOverlap.FULL_OVERLAP;
else if ( (read.getAlignmentStart() < interval.getStart()) )
else if ( (start < interval.getStart()) )
return ReadAndIntervalOverlap.LEFT_OVERLAP;
else
return ReadAndIntervalOverlap.RIGHT_OVERLAP;
}
public static int getSoftUnclippedStart(SAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
start += cigarElement.getLength();
else
break;
}
return start;
}
public static int getSoftUnclippedStop(SAMRecord read) {
int stop = getSoftUnclippedStart(read);
for (CigarElement cigarElement : read.getCigar().getCigarElements())
if (cigarElement.getOperator().consumesReadBases())
stop += cigarElement.getLength();
return stop;
}
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {