Fixed HardClipping and Interval containment
* Hard clipping was wrongfully hard clipping unmapped reads while soft clipping then hard clipping mapped reads. Now we throw exception if we try to hard/soft clip unmapped reads and use the soft->hard clip procedure fore every mapped read. * Interval containment needed a <= and >= to make sure it caught the borders right.
This commit is contained in:
parent
0be1dacddb
commit
291d8c7596
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.Vector;
|
||||
|
|
@ -72,48 +73,45 @@ public class ClippingOp {
|
|||
break;
|
||||
case HARDCLIP_BASES:
|
||||
case SOFTCLIP_BASES:
|
||||
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
||||
if ( clippedRead.getReadUnmappedFlag() ) {
|
||||
// we can't process unmapped reads
|
||||
|
||||
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
||||
int myStop = stop;
|
||||
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
||||
//break;
|
||||
myStop--; // just decrement stop
|
||||
}
|
||||
|
||||
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
||||
clippedRead.getReadName(), start, myStop));
|
||||
|
||||
Cigar oldCigar = clippedRead.getCigar();
|
||||
|
||||
int scLeft = 0, scRight = clippedRead.getReadLength();
|
||||
if ( start == 0 )
|
||||
scLeft = myStop + 1;
|
||||
else
|
||||
scRight = start;
|
||||
|
||||
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
||||
clippedRead.setCigar(newCigar);
|
||||
|
||||
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
||||
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
||||
clippedRead.setAlignmentStart(newStart);
|
||||
|
||||
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
||||
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
||||
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
||||
} else if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||
// we can hard clip unmapped reads
|
||||
if ( clippedRead.getReadNegativeStrandFlag() )
|
||||
clippedRead = ReadUtils.hardClipBases(clippedRead, 0, start, null);
|
||||
else
|
||||
clippedRead = ReadUtils.hardClipBases(clippedRead, start, start + getLength(), null);
|
||||
throw new UserException("Read Clipper cannot soft/hard clip unmapped reads");
|
||||
}
|
||||
|
||||
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
||||
int myStop = stop;
|
||||
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
||||
//break;
|
||||
myStop--; // just decrement stop
|
||||
}
|
||||
|
||||
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
||||
clippedRead.getReadName(), start, myStop));
|
||||
|
||||
Cigar oldCigar = clippedRead.getCigar();
|
||||
|
||||
int scLeft = 0, scRight = clippedRead.getReadLength();
|
||||
if ( start == 0 )
|
||||
scLeft = myStop + 1;
|
||||
else
|
||||
scRight = start;
|
||||
|
||||
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
||||
clippedRead.setCigar(newCigar);
|
||||
|
||||
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
||||
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
||||
clippedRead.setAlignmentStart(newStart);
|
||||
|
||||
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
||||
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
||||
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -618,8 +618,8 @@ public class ReadUtils {
|
|||
(read.getUnclippedStart() > interval.getStop()) )
|
||||
return ReadAndIntervalOverlap.NO_OVERLAP;
|
||||
|
||||
else if ( (read.getUnclippedStart() > interval.getStart()) &&
|
||||
(read.getUnclippedEnd() < interval.getStop()) )
|
||||
else if ( (read.getUnclippedStart() >= interval.getStart()) &&
|
||||
(read.getUnclippedEnd() <= interval.getStop()) )
|
||||
return ReadAndIntervalOverlap.CONTAINED;
|
||||
|
||||
else if ( (read.getUnclippedStart() < interval.getStart()) &&
|
||||
|
|
|
|||
Loading…
Reference in New Issue