added naive ad-hoc cutoff for the pile size the cleaner will attempt to process; use --maxPileSize argument to force any pile larger than specified cutoff to be directly written to the output without cleaning
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@972 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f9be175f44
commit
030efc468f
|
|
@ -11,10 +11,7 @@ import org.broadinstitute.sting.playground.indels.*;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.FileWriter;
|
import java.io.FileWriter;
|
||||||
import java.io.OutputStream;
|
|
||||||
|
|
||||||
@WalkerName("IntervalCleaner")
|
@WalkerName("IntervalCleaner")
|
||||||
public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer> {
|
public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer> {
|
||||||
|
|
@ -30,6 +27,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
public String OUT_STATS = null;
|
public String OUT_STATS = null;
|
||||||
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
|
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
|
||||||
public double LOD_THRESHOLD = 5.0;
|
public double LOD_THRESHOLD = 5.0;
|
||||||
|
@Argument(fullName="maxPileSize", shortName="maxSize", doc="max number of reads in the pile; if exceeded, no attempt will be made to realign the pile", required=false)
|
||||||
|
public int maxPileSize = 1000000000;
|
||||||
@Argument(fullName="EntropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
|
@Argument(fullName="EntropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
|
||||||
public double MISMATCH_THRESHOLD = 0.25;
|
public double MISMATCH_THRESHOLD = 0.25;
|
||||||
@Argument(fullName="GreedyThreshold", shortName="greedy", doc="coverage above which the cleaner turns on greedy mode to improve performance", required=false)
|
@Argument(fullName="GreedyThreshold", shortName="greedy", doc="coverage above which the cleaner turns on greedy mode to improve performance", required=false)
|
||||||
|
|
@ -109,7 +108,28 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
else
|
else
|
||||||
readsToWrite.add(new ComparableSAMRecord(read));
|
readsToWrite.add(new ComparableSAMRecord(read));
|
||||||
}
|
}
|
||||||
clean(goodReads, ref, context.getLocation());
|
|
||||||
|
if ( goodReads.size() > maxPileSize ) {
|
||||||
|
// too many reads, shy away!
|
||||||
|
|
||||||
|
if ( statsOutput != null ) {
|
||||||
|
try {
|
||||||
|
statsOutput.write(context.getLocation().toString());
|
||||||
|
statsOutput.write("\tSKIPPED ("+reads.size()+" reads total, "+goodReads.size()+" for realignment)\t");
|
||||||
|
statsOutput.write("-1.0");
|
||||||
|
statsOutput.write("\n");
|
||||||
|
statsOutput.flush();
|
||||||
|
} catch (Exception e) {}
|
||||||
|
|
||||||
|
}
|
||||||
|
// push all "good" reads into readsToWrite without cleaning, there are too many!
|
||||||
|
for ( SAMRecord read : goodReads ) {
|
||||||
|
readsToWrite.add(new ComparableSAMRecord(read));
|
||||||
|
}
|
||||||
|
goodReads.clear();
|
||||||
|
} else {
|
||||||
|
clean(goodReads, ref, context.getLocation());
|
||||||
|
}
|
||||||
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
|
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
|
||||||
//testCleanWithDeletion();
|
//testCleanWithDeletion();
|
||||||
//testCleanWithInsertion();
|
//testCleanWithInsertion();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue