Better method for downsampling deep regions
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@983 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4d9a88153a
commit
599ceeddd8
|
|
@ -271,8 +271,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
// if we have too many reads with mismatches, be greedy
|
// if we have too many reads with mismatches, be greedy
|
||||||
if ( altReads.size() > GREEDY_THRESHOLD) {
|
if ( altReads.size() > GREEDY_THRESHOLD) {
|
||||||
logger.debug("Downsampling from " + altReads.size() + " to " + GREEDY_THRESHOLD + " mismatching reads");
|
logger.debug("Downsampling from " + altReads.size() + " to " + GREEDY_THRESHOLD + " mismatching reads");
|
||||||
//sortByGreedy();
|
// the best thing to do here is to randomly sample from the reads
|
||||||
for ( int i = GREEDY_THRESHOLD; i < altReads.size(); i++)
|
// however, we definitely do want to keep the clean indel-containing reads
|
||||||
|
// (which were purposely placed at the beginning of the list)
|
||||||
|
int downsampleTo = GREEDY_THRESHOLD - priorIndelsToTest.size();
|
||||||
|
int sampleRate = (altReads.size() - priorIndelsToTest.size()) / downsampleTo;
|
||||||
|
for ( int i = 0; i < downsampleTo; i++) {
|
||||||
|
int index = priorIndelsToTest.size() + (i * sampleRate);
|
||||||
|
for ( int j = 1; j < sampleRate; j++)
|
||||||
|
altAlignmentsToTest.set(index+j, false);
|
||||||
|
}
|
||||||
|
// also get the trailing reads
|
||||||
|
int tail = priorIndelsToTest.size() + (downsampleTo * sampleRate);
|
||||||
|
for ( int i = tail; i < altAlignmentsToTest.size(); i++)
|
||||||
altAlignmentsToTest.set(i, false);
|
altAlignmentsToTest.set(i, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue