Fixed HardClipping and Interval containment

* Hard clipping was wrongfully hard clipping unmapped reads while soft clipping then hard clipping mapped reads. Now we throw exception if we try to hard/soft clip unmapped reads and use the soft->hard clip procedure fore every mapped read.

 * Interval containment needed a <= and >= to make sure it caught the borders right.
This commit is contained in:
Mauricio Carneiro 2011-08-13 21:02:24 -04:00
parent 0be1dacddb
commit 291d8c7596
2 changed files with 39 additions and 41 deletions

View File

@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.Vector;
@ -72,48 +73,45 @@ public class ClippingOp {
break;
case HARDCLIP_BASES:
case SOFTCLIP_BASES:
if ( ! clippedRead.getReadUnmappedFlag() ) {
if ( clippedRead.getReadUnmappedFlag() ) {
// we can't process unmapped reads
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
int myStop = stop;
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
//break;
myStop--; // just decrement stop
}
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
clippedRead.getReadName(), start, myStop));
Cigar oldCigar = clippedRead.getCigar();
int scLeft = 0, scRight = clippedRead.getReadLength();
if ( start == 0 )
scLeft = myStop + 1;
else
scRight = start;
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
clippedRead.setCigar(newCigar);
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
clippedRead.setAlignmentStart(newStart);
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
} else if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) {
// we can hard clip unmapped reads
if ( clippedRead.getReadNegativeStrandFlag() )
clippedRead = ReadUtils.hardClipBases(clippedRead, 0, start, null);
else
clippedRead = ReadUtils.hardClipBases(clippedRead, start, start + getLength(), null);
throw new UserException("Read Clipper cannot soft/hard clip unmapped reads");
}
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
int myStop = stop;
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
//break;
myStop--; // just decrement stop
}
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
clippedRead.getReadName(), start, myStop));
Cigar oldCigar = clippedRead.getCigar();
int scLeft = 0, scRight = clippedRead.getReadLength();
if ( start == 0 )
scLeft = myStop + 1;
else
scRight = start;
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
clippedRead.setCigar(newCigar);
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
clippedRead.setAlignmentStart(newStart);
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
break;
default:
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
}

View File

@ -618,8 +618,8 @@ public class ReadUtils {
(read.getUnclippedStart() > interval.getStop()) )
return ReadAndIntervalOverlap.NO_OVERLAP;
else if ( (read.getUnclippedStart() > interval.getStart()) &&
(read.getUnclippedEnd() < interval.getStop()) )
else if ( (read.getUnclippedStart() >= interval.getStart()) &&
(read.getUnclippedEnd() <= interval.getStop()) )
return ReadAndIntervalOverlap.CONTAINED;
else if ( (read.getUnclippedStart() < interval.getStart()) &&