Fixed HardClipping and Interval containment
* Hard clipping was wrongfully hard clipping unmapped reads while soft clipping then hard clipping mapped reads. Now we throw exception if we try to hard/soft clip unmapped reads and use the soft->hard clip procedure fore every mapped read. * Interval containment needed a <= and >= to make sure it caught the borders right.
This commit is contained in:
parent
0be1dacddb
commit
291d8c7596
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
|
@ -72,48 +73,45 @@ public class ClippingOp {
|
||||||
break;
|
break;
|
||||||
case HARDCLIP_BASES:
|
case HARDCLIP_BASES:
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
if ( clippedRead.getReadUnmappedFlag() ) {
|
||||||
// we can't process unmapped reads
|
// we can't process unmapped reads
|
||||||
|
throw new UserException("Read Clipper cannot soft/hard clip unmapped reads");
|
||||||
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
|
||||||
int myStop = stop;
|
|
||||||
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
|
||||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
|
||||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
|
||||||
//break;
|
|
||||||
myStop--; // just decrement stop
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
|
||||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
|
||||||
clippedRead.getReadName(), start, myStop));
|
|
||||||
|
|
||||||
Cigar oldCigar = clippedRead.getCigar();
|
|
||||||
|
|
||||||
int scLeft = 0, scRight = clippedRead.getReadLength();
|
|
||||||
if ( start == 0 )
|
|
||||||
scLeft = myStop + 1;
|
|
||||||
else
|
|
||||||
scRight = start;
|
|
||||||
|
|
||||||
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
|
||||||
clippedRead.setCigar(newCigar);
|
|
||||||
|
|
||||||
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
|
||||||
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
|
||||||
clippedRead.setAlignmentStart(newStart);
|
|
||||||
|
|
||||||
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
|
||||||
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
|
||||||
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
|
||||||
} else if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) {
|
|
||||||
// we can hard clip unmapped reads
|
|
||||||
if ( clippedRead.getReadNegativeStrandFlag() )
|
|
||||||
clippedRead = ReadUtils.hardClipBases(clippedRead, 0, start, null);
|
|
||||||
else
|
|
||||||
clippedRead = ReadUtils.hardClipBases(clippedRead, start, start + getLength(), null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
||||||
|
int myStop = stop;
|
||||||
|
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
||||||
|
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||||
|
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
||||||
|
//break;
|
||||||
|
myStop--; // just decrement stop
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
||||||
|
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
||||||
|
clippedRead.getReadName(), start, myStop));
|
||||||
|
|
||||||
|
Cigar oldCigar = clippedRead.getCigar();
|
||||||
|
|
||||||
|
int scLeft = 0, scRight = clippedRead.getReadLength();
|
||||||
|
if ( start == 0 )
|
||||||
|
scLeft = myStop + 1;
|
||||||
|
else
|
||||||
|
scRight = start;
|
||||||
|
|
||||||
|
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
||||||
|
clippedRead.setCigar(newCigar);
|
||||||
|
|
||||||
|
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
||||||
|
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
||||||
|
clippedRead.setAlignmentStart(newStart);
|
||||||
|
|
||||||
|
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
||||||
|
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
||||||
|
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -618,8 +618,8 @@ public class ReadUtils {
|
||||||
(read.getUnclippedStart() > interval.getStop()) )
|
(read.getUnclippedStart() > interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP;
|
return ReadAndIntervalOverlap.NO_OVERLAP;
|
||||||
|
|
||||||
else if ( (read.getUnclippedStart() > interval.getStart()) &&
|
else if ( (read.getUnclippedStart() >= interval.getStart()) &&
|
||||||
(read.getUnclippedEnd() < interval.getStop()) )
|
(read.getUnclippedEnd() <= interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.CONTAINED;
|
return ReadAndIntervalOverlap.CONTAINED;
|
||||||
|
|
||||||
else if ( (read.getUnclippedStart() < interval.getStart()) &&
|
else if ( (read.getUnclippedStart() < interval.getStart()) &&
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue