Softclipping support in clip reads walker. Minor improvement to WalkerTest -- now can specify file extensions for tmp files. Matt -- I couldn't easily create non-presorted SAM file. The softclipper has an impact on this.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1878 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-10-19 21:54:53 +00:00
parent 055a99fb05
commit 2a26bb42dd
3 changed files with 297 additions and 91 deletions

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.*;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
@ -15,10 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
@ -58,29 +53,31 @@ import net.sf.samtools.util.StringUtil;
*/
@Requires({DataSource.READS})
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, ClipReadsWalker.ClippingData> {
/** an optional argument to dump the reads out to a BAM file */
/**
* an optional argument to dump the reads out to a BAM file
*/
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
SAMFileWriter outputBam = null;
String outputBamFile = null;
@Argument(fullName = "", shortName = "STD", doc="FOR DEBUGGING ONLY", required = false)
boolean toStandardOut = false;
@Argument(fullName = "", shortName = "STD", doc = "FOR DEBUGGING ONLY", required = false)
boolean toStandardOut = false;
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc="", required = false)
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
int qTrimmingThreshold = -1;
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc="String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
String cyclesToClipArg = null;
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc="Remove sequences within reads matching these sequences", required = false)
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false)
String clipSequenceFile = null;
@Argument(fullName = "clipSequence", shortName = "X", doc="Remove sequences within reads matching this sequence", required = false)
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
String[] clipSequencesArgs = null;
// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
// boolean onlyClipFirstSeqMatch = false;
@Argument(fullName = "clipRepresentation", shortName = "CR", doc="How should we actually clip the bases?", required = false)
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
/**
@ -97,28 +94,28 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* The initialize function.
*/
public void initialize() {
if ( qTrimmingThreshold >= 0 ) {
if (qTrimmingThreshold >= 0) {
logger.info(String.format("Creating Q-score clipper with threshold %d", qTrimmingThreshold));
}
//
// Initialize the sequences to clip
//
if ( clipSequencesArgs != null ) {
if (clipSequencesArgs != null) {
int i = 0;
for ( String toClip : clipSequencesArgs ) {
for (String toClip : clipSequencesArgs) {
i++;
ReferenceSequence rs = new ReferenceSequence("CMDLINE-"+i, -1, StringUtil.stringToBytes(toClip));
ReferenceSequence rs = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(toClip));
addSeqToClip(rs.getName(), rs.getBases());
}
}
if ( clipSequenceFile != null ) {
if (clipSequenceFile != null) {
ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(clipSequenceFile));
while ( true ) {
while (true) {
ReferenceSequence rs = rsf.nextSequence();
if ( rs == null )
if (rs == null)
break;
else {
addSeqToClip(rs.getName(), rs.getBases());
@ -127,24 +124,23 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
}
//
// Initialize the cycle ranges to clip
//
if ( cyclesToClipArg != null ) {
if (cyclesToClipArg != null) {
cyclesToClip = new ArrayList<Pair<Integer, Integer>>();
for ( String range : cyclesToClipArg.split(",") ) {
for (String range : cyclesToClipArg.split(",")) {
try {
String[] elts = range.split("-");
int start = Integer.parseInt(elts[0]) - 1;
int stop = Integer.parseInt(elts[1]) - 1;
if ( start < 0 ) throw new Exception();
if ( stop < start ) throw new Exception();
if (start < 0) throw new Exception();
if (stop < start) throw new Exception();
logger.info(String.format("Creating cycle clipper %d-%d", start, stop));
cyclesToClip.add(new Pair<Integer, Integer>(start, stop));
} catch ( Exception e ) {
} catch (Exception e) {
throw new RuntimeException("Badly formatted cyclesToClip argument: " + cyclesToClipArg);
}
}
@ -153,7 +149,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* Helper function that adds a seq with name and bases (as bytes) to the list of sequences to be clipped
*
*
* @param name
* @param bases
*/
@ -165,11 +161,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* The reads map function.
* @param ref the reference bases that correspond to our read, if a reference was provided
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the ReadClipper object describing what should be done to clip this read
*/
public ReadClipper map( char[] ref, SAMRecord read ) {
public ReadClipper map(char[] ref, SAMRecord read) {
ReadClipper clipper = new ReadClipper(read);
//
@ -189,10 +186,10 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @param clipper
*/
private void clipSequences(ReadClipper clipper) {
if ( sequencesToClip != null ) { // don't bother if we don't have any sequences to clip
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
SAMRecord read = clipper.getRead();
for ( SeqToClip stc : sequencesToClip ) {
for (SeqToClip stc : sequencesToClip) {
// we have a pattern for both the forward and the reverse strands
Pattern pattern = read.getReadNegativeStrandFlag() ? stc.revPat : stc.fwdPat;
String bases = read.getReadString();
@ -200,10 +197,10 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
// keep clipping until match.find() says it can't find anything else
boolean found = true; // go through at least once
while ( found ) {
while (found) {
found = match.find();
//System.out.printf("Matching %s against %s/%s => %b%n", bases, stc.seq, stc.revSeq, found);
if ( found ) {
if (found) {
int start = match.start();
int stop = match.end() - 1;
ClippingOp op = new ClippingOp(ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
@ -223,9 +220,9 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @param stop
* @return
*/
private Pair<Integer, Integer> strandAwarePositions( SAMRecord read, int start, int stop ) {
if ( read.getReadNegativeStrandFlag() )
return new Pair<Integer, Integer>( read.getReadLength() - stop - 1, read.getReadLength() - start - 1 );
private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
if (read.getReadNegativeStrandFlag())
return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
else
return new Pair<Integer, Integer>(start, stop);
}
@ -236,20 +233,20 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @param clipper
*/
private void clipCycles(ReadClipper clipper) {
if ( cyclesToClip != null ) {
if (cyclesToClip != null) {
SAMRecord read = clipper.getRead();
for ( Pair<Integer, Integer> p : cyclesToClip ) { // iterate over each cycle range
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
int cycleStart = p.first;
int cycleStop = p.second;
if ( cycleStart < read.getReadLength() ) {
if (cycleStart < read.getReadLength()) {
// only try to clip if the cycleStart is less than the read's length
if ( cycleStop >= read.getReadLength() )
if (cycleStop >= read.getReadLength())
// we do tolerate [for convenience) clipping when the stop is beyond the end of the read
cycleStop = read.getReadLength() - 1;
Pair<Integer, Integer> startStop = strandAwarePositions( read, cycleStart, cycleStop );
Pair<Integer, Integer> startStop = strandAwarePositions(read, cycleStart, cycleStop);
int start = startStop.first;
int stop = startStop.second;
@ -262,11 +259,11 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* Clip bases from the read in clipper from
*
* <p/>
* argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)
*
* <p/>
* to the end of the read. This is blatantly stolen from BWA.
*
* <p/>
* Walk through the read from the end (in machine cycle order) to the beginning, calculating the
* running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this
* sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the
@ -281,17 +278,17 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
int clipSum = 0, lastMax = -1, clipPoint = -1; // -1 means no clip
for ( int i = readLen - 1; i >= 0; i-- ) {
for (int i = readLen - 1; i >= 0; i--) {
int baseIndex = read.getReadNegativeStrandFlag() ? readLen - i - 1 : i;
byte qual = quals[baseIndex];
clipSum += (qTrimmingThreshold - qual);
if ( clipSum >= 0 && ( clipSum >= lastMax ) ) {
if (clipSum >= 0 && (clipSum >= lastMax)) {
lastMax = clipSum;
clipPoint = baseIndex;
}
}
if ( clipPoint != -1 ) {
if (clipPoint != -1) {
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
clipper.addOp(new ClippingOp(ClippingType.LOW_Q_SCORES, start, stop, null));
@ -301,33 +298,41 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* reduceInit is called once before any calls to the map function. We use it here to setup the output
* bam file, if it was specified on the command line
* @return
*
* @return
*/
public ClippingData reduceInit() {
SAMFileWriter outputBam = null;
if ( outputBamFile != null && ! toStandardOut ) {
SAMFileHeader header = this.getToolkit().getSAMFileHeader();
outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, 5);
}
return new ClippingData(outputBam, sequencesToClip);
}
public ClippingData reduce( ReadClipper clipper, ClippingData data ) {
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
if (data.output != null) {
data.output.addAlignment(clipper.clipRead(clippingRepresentation));
} else if ( toStandardOut ) {
} else if (toStandardOut) {
out.println(clipper.clipRead(clippingRepresentation).format());
}
data.nTotalReads++;
data.nTotalBases += clipper.getRead().getReadLength();
if ( clipper.wasClipped() ) {
if (clipper.wasClipped()) {
data.nClippedReads++;
for ( ClippingOp op : clipper.getOps() ) {
switch ( op.type ) {
for (ClippingOp op : clipper.getOps()) {
switch (op.type) {
case LOW_Q_SCORES:
data.incNQClippedBases( op.getLength() );
data.incNQClippedBases(op.getLength());
break;
case WITHIN_CLIP_RANGE:
data.incNRangeClippedBases( op.getLength() );
data.incNRangeClippedBases(op.getLength());
break;
case MATCHES_CLIP_SEQ:
data.incSeqClippedBases( (String)op.extraInfo, op.getLength() );
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
break;
}
}
@ -336,7 +341,10 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
return data;
}
public void onTraversalDone( ClippingData data ) {
public void onTraversalDone(ClippingData data) {
if (data.output != null)
data.output.close();
out.printf(data.toString());
}
@ -372,7 +380,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* Represents a clip on a read. It has a type (see the enum) along with a start and stop in the bases
* of the read, plus an option extraInfo (useful for carrying info where needed).
*
* <p/>
* Also holds the critical apply function that actually execute the clipping operation on a provided read,
* according to the wishes of the supplid ClippingAlgorithm enum.
*/
@ -381,14 +389,16 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
public int start, stop; // inclusive
public Object extraInfo = null;
public ClippingOp(ClippingType type, int start, int stop, Object extraInfo ) {
public ClippingOp(ClippingType type, int start, int stop, Object extraInfo) {
this.type = type;
this.start = start;
this.stop = stop;
this.extraInfo = extraInfo;
}
public int getLength() { return stop - start + 1; }
public int getLength() {
return stop - start + 1;
}
/**
* Clips the bases in clippedRead according to this operation's start and stop. Uses the clipping
@ -398,17 +408,57 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @param clippedRead
*/
public void apply(ClippingRepresentation algorithm, SAMRecord clippedRead) {
switch ( algorithm ) {
//clippedRead.setReferenceIndex(1);
switch (algorithm) {
case WRITE_NS:
for ( int i = start; i <= stop; i++ )
for (int i = start; i <= stop; i++)
clippedRead.getReadBases()[i] = 'N';
break;
case WRITE_Q0S:
for ( int i = start; i <= stop; i++ )
for (int i = start; i <= stop; i++)
clippedRead.getBaseQualities()[i] = 0;
break;
case WRITE_NS_Q0S:
for (int i = start; i <= stop; i++) {
clippedRead.getReadBases()[i] = 'N';
clippedRead.getBaseQualities()[i] = 0;
}
break;
case SOFTCLIP_BASES:
throw new RuntimeException("Softclipping of bases not yet implemented.");
if ( ! clippedRead.getReadUnmappedFlag() ) {
// we can't process unmapped reads
if ( start > 0 && stop != clippedRead.getReadLength() - 1 )
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
clippedRead.getReadName(), start, stop));
Cigar oldCigar = clippedRead.getCigar();
int scLeft = 0, scRight = clippedRead.getReadLength();
if ( clippedRead.getReadNegativeStrandFlag() ) {
if ( start == 0 )
scLeft = stop + 1;
else
scRight = start + 1;
} else {
if ( start == 0 )
scLeft = stop;
else
scRight = start;
}
Cigar newCigar = _softClip(oldCigar, scLeft, scRight);
clippedRead.setCigar(newCigar);
int newClippedStart = _getNewAlignmentStartOffset(newCigar, oldCigar);
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
clippedRead.setAlignmentStart(newStart);
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
}
break;
//throw new RuntimeException("Softclipping of bases not yet implemented.");
}
}
}
@ -419,12 +469,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
private enum ClippingRepresentation {
WRITE_NS, // change the bases to Ns
WRITE_Q0S, // change the quality scores to Q0
WRITE_NS_Q0S, // change the quality scores to Q0 and write Ns
SOFTCLIP_BASES // change cigar string to S, but keep bases
}
/**
* A simple collection of the clipping operations to apply to a read along with its read
*
*/
public class ReadClipper {
SAMRecord read;
@ -432,6 +482,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* We didn't do any clipping work on this read, just leave everything as a default
*
* @param read
*/
public ReadClipper(final SAMRecord read) {
@ -440,16 +491,25 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* Add another clipping operation to apply to this read
*
* @param op
*/
public void addOp( ClippingOp op ) {
if ( ops == null ) ops = new ArrayList<ClippingOp>();
public void addOp(ClippingOp op) {
if (ops == null) ops = new ArrayList<ClippingOp>();
ops.add(op);
}
public List<ClippingOp> getOps() { return ops; }
public boolean wasClipped() { return ops != null; }
public SAMRecord getRead() { return read; }
public List<ClippingOp> getOps() {
return ops;
}
public boolean wasClipped() {
return ops != null;
}
public SAMRecord getRead() {
return read;
}
/**
@ -459,12 +519,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @return
*/
public SAMRecord clipRead(ClippingRepresentation algorithm) {
if ( ops == null )
if (ops == null)
return getRead();
else {
try {
SAMRecord clippedRead = (SAMRecord)read.clone();
for ( ClippingOp op : getOps() ) {
SAMRecord clippedRead = (SAMRecord) read.clone();
for (ClippingOp op : getOps()) {
op.apply(algorithm, clippedRead);
}
return clippedRead;
@ -490,22 +550,22 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
public ClippingData(SAMFileWriter output, List<SeqToClip> clipSeqs) {
this.output = output;
for ( SeqToClip clipSeq : clipSeqs ) {
for (SeqToClip clipSeq : clipSeqs) {
seqClipCounts.put(clipSeq.seq, 0L);
}
}
public void incNQClippedBases( int n ) {
public void incNQClippedBases(int n) {
nQClippedBases += n;
nClippedBases += n;
}
public void incNRangeClippedBases( int n ) {
public void incNRangeClippedBases(int n) {
nRangeClippedBases += n;
nClippedBases += n;
}
public void incSeqClippedBases( final String seq, int n ) {
public void incSeqClippedBases(final String seq, int n) {
nSeqClippedBases += n;
nClippedBases += n;
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
@ -517,20 +577,156 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
s.append(Utils.dupString('-', 80) + "\n");
s.append(String.format("Number of examined reads %d%n", nTotalReads));
s.append(String.format("Number of clipped reads %d%n", nClippedReads));
s.append(String.format("Percent of clipped reads %.2f%n", (100.0*nClippedReads) / nTotalReads));
s.append(String.format("Percent of clipped reads %.2f%n", (100.0 * nClippedReads) / nTotalReads));
s.append(String.format("Number of examined bases %d%n", nTotalBases));
s.append(String.format("Number of clipped bases %d%n", nClippedBases));
s.append(String.format("Percent of clipped bases %.2f%n", (100.0*nClippedBases) / nTotalBases));
s.append(String.format("Percent of clipped bases %.2f%n", (100.0 * nClippedBases) / nTotalBases));
s.append(String.format("Number of quality-score clipped bases %d%n", nQClippedBases));
s.append(String.format("Number of range clipped bases %d%n", nRangeClippedBases));
s.append(String.format("Number of sequence clipped bases %d%n", nSeqClippedBases));
for ( Map.Entry<String, Long> elt : seqClipCounts.entrySet() ) {
s.append(String.format(" %8d clip sites matching %s%n", elt.getValue(), elt.getKey() ));
for (Map.Entry<String, Long> elt : seqClipCounts.entrySet()) {
s.append(String.format(" %8d clip sites matching %s%n", elt.getValue(), elt.getKey()));
}
s.append(Utils.dupString('-', 80) + "\n");
return s.toString();
}
}
/**
* Given a cigar string, get the number of bases hard or soft clipped at the start
*/
private int _getNewAlignmentStartOffset(final Cigar __cigar, final Cigar __oldCigar) {
int num = 0;
for (CigarElement e : __cigar.getCigarElements()) {
if (!e.getOperator().consumesReferenceBases()) {
if (e.getOperator().consumesReadBases()) {
num += e.getLength();
}
} else {
break;
}
}
int oldNum = 0;
int curReadCounter = 0;
for (CigarElement e : __oldCigar.getCigarElements()) {
int curRefLength = e.getLength();
int curReadLength = e.getLength();
if (!e.getOperator().consumesReadBases()) {
curReadLength = 0;
}
boolean truncated = false;
if (curReadCounter + curReadLength > num) {
curReadLength = num - curReadCounter;
curRefLength = num - curReadCounter;
truncated = true;
}
if (!e.getOperator().consumesReferenceBases()) {
curRefLength = 0;
}
curReadCounter += curReadLength;
oldNum += curRefLength;
if (curReadCounter > num || truncated) {
break;
}
}
return oldNum;
}
/**
* Given a cigar string, soft clip up to startClipEnd and soft clip starting at endClipBegin
*/
private Cigar _softClip(final Cigar __cigar, final int __startClipEnd, final int __endClipBegin) {
if (__endClipBegin <= __startClipEnd) {
//whole thing should be soft clipped
int cigarLength = 0;
for (CigarElement e : __cigar.getCigarElements()) {
cigarLength += e.getLength();
}
Cigar newCigar = new Cigar();
newCigar.add(new CigarElement(cigarLength, CigarOperator.SOFT_CLIP));
assert newCigar.isValid(null, -1) == null;
return newCigar;
}
int curLength = 0;
Vector<CigarElement> newElements = new Vector<CigarElement>();
for (CigarElement curElem : __cigar.getCigarElements()) {
if (!curElem.getOperator().consumesReadBases()) {
if (curLength > __startClipEnd && curLength < __endClipBegin) {
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
}
continue;
}
int s = curLength;
int e = curLength + curElem.getLength();
if (e <= __startClipEnd || s >= __endClipBegin) {
//must turn this entire thing into a clip
newElements.add(new CigarElement(curElem.getLength(), CigarOperator.SOFT_CLIP));
} else if (s >= __startClipEnd && e <= __endClipBegin) {
//same thing
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
} else {
//we are clipping in the middle of this guy
CigarElement newStart = null;
CigarElement newMid = null;
CigarElement newEnd = null;
int midLength = curElem.getLength();
if (s < __startClipEnd) {
newStart = new CigarElement(__startClipEnd - s, CigarOperator.SOFT_CLIP);
midLength -= newStart.getLength();
}
if (e > __endClipBegin) {
newEnd = new CigarElement(e - __endClipBegin, CigarOperator.SOFT_CLIP);
midLength -= newEnd.getLength();
}
assert midLength >= 0;
if (midLength > 0) {
newMid = new CigarElement(midLength, curElem.getOperator());
}
if (newStart != null) {
newElements.add(newStart);
}
if (newMid != null) {
newElements.add(newMid);
}
if (newEnd != null) {
newElements.add(newEnd);
}
}
curLength += curElem.getLength();
}
Vector<CigarElement> finalNewElements = new Vector<CigarElement>();
CigarElement lastElement = null;
for (CigarElement elem : newElements) {
if (lastElement == null || lastElement.getOperator() != elem.getOperator()) {
if (lastElement != null) {
finalNewElements.add(lastElement);
}
lastElement = elem;
} else {
lastElement = new CigarElement(lastElement.getLength() + elem.getLength(), lastElement.getOperator());
}
}
if (lastElement != null) {
finalNewElements.add(lastElement);
}
Cigar newCigar = new Cigar(finalNewElements);
assert newCigar.isValid(null, -1) == null;
return newCigar;
}
}

View File

@ -85,12 +85,20 @@ public class WalkerTest extends BaseTest {
String args = "";
int nOutputFiles = -1;
List<String> md5s = null;
List<String> exts = null;
public WalkerTestSpec(String args, int nOutputFiles, List<String> md5s) {
this.args = args;
this.nOutputFiles = nOutputFiles;
this.md5s = md5s;
}
public WalkerTestSpec(String args, int nOutputFiles, List<String> exts, List<String> md5s) {
this.args = args;
this.nOutputFiles = nOutputFiles;
this.md5s = md5s;
this.exts = exts;
}
}
protected boolean parameterize() {
@ -101,7 +109,8 @@ public class WalkerTest extends BaseTest {
List<File> tmpFiles = new ArrayList<File>();
for ( int i = 0; i < spec.nOutputFiles; i++ ) {
try {
File fl = File.createTempFile(String.format("walktest.tmp_param.%d", i), ".tmp" );
String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i);
File fl = File.createTempFile(String.format("walktest.tmp_param.%d", i), ext );
fl.deleteOnExit();
tmpFiles.add( fl );
} catch (IOException ex) {

View File

@ -18,12 +18,14 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
"-o %s " +
"-ob %s " + args,
2, // just one output file
Arrays.asList("tmp", "bam"),
Arrays.asList(md51, md52));
List<File> result = executeTest(name, spec).getFirst();
}
final static String Q10ClipOutput = "b29c5bc1cb9006ed9306d826a11d444f";
@Test public void testQClip0() { testClipper("clipQSum0", "-QT 0", "117a4760b54308f81789c39b1c9de578", "2465660bcd975a1dc6dfbf40a21bf6ad"); }
@Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", "b29c5bc1cb9006ed9306d826a11d444f", "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", Q10ClipOutput, "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testQClip10() { testClipper("clipQSum10", "-QT 10", "b29c5bc1cb9006ed9306d826a11d444f", "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testQClip20() { testClipper("clipQSum20", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "9a4b1c83c026ca83db00bb71999246cf"); }
@Test public void testQClip30() { testClipper("clipQSum30", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "9a4b1c83c026ca83db00bb71999246cf"); }
@ -36,8 +38,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF /humgen/gsa-scr1/GATK_Data/Validation_Data/seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "4a1153d6f0600cf53ff7959a043e57cc"); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", "b29c5bc1cb9006ed9306d826a11d444f", "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", "b29c5bc1cb9006ed9306d826a11d444f", "24053a87b00c0bc2ddf420975e9fea4d"); }
@Test (expected = Exception.class)
public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", "", ""); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "24053a87b00c0bc2ddf420975e9fea4d"); }
@Test public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", Q10ClipOutput, "aeb67cca75285a68af8a965faa547e7f"); }
}