We're unable to make the naive deduper more worldly, so we're killing it instead.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3587 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-18 13:54:27 +00:00
parent 42e7ff4f28
commit 5050b19457
4 changed files with 5 additions and 21 deletions

View File

@ -10,6 +10,5 @@ package org.broadinstitute.sting.gatk;
public enum DownsampleType {
NONE,
ALL_READS,
EXPERIMENTAL_BY_SAMPLE,
EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR
EXPERIMENTAL_BY_SAMPLE
}

View File

@ -29,7 +29,7 @@ public class DownsamplingMethod {
// Do some basic sanity checks on the downsampling parameters passed in.
// Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
if(type != DownsampleType.NONE && type != DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR && toFraction == null && toCoverage == null)
if(type != DownsampleType.NONE && toFraction == null && toCoverage == null)
throw new StingException("Must specify either toFraction or toCoverage when downsampling.");
// Fraction and coverage cannot both be specified.

View File

@ -69,7 +69,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
LocusIterator locusIterator;
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
if(sourceInfo.getDownsamplingMethod() != null &&
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE || sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)) {
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)) {
if ( discards.size() > 0 )
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);

View File

@ -552,20 +552,10 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
private int totalReadStates = 0;
/**
* Store a random number generator with a consistent seed for consistent downsampling from run to run.
* Note that each shard will be initialized with the same random seed; this will ensure consistent results
* across parallelized runs, at the expense of decreasing our level of randomness.
*/
private Random downsampleRandomizer = new Random(38148309L);
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
this.iterator = new PeekableIterator<SAMRecord>(source);
this.downsamplingMethod = downsamplingMethod;
switch(downsamplingMethod.type) {
case EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR:
this.targetCoverage = downsamplingMethod.toCoverage != null ? downsamplingMethod.toCoverage : 1;
break;
case EXPERIMENTAL_BY_SAMPLE:
if(downsamplingMethod.toCoverage == null)
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
@ -580,12 +570,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
for(String sampleName: sampleNames)
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
ReadSelector primaryReadSelector;
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
primaryReadSelector = new NRandomReadSelector(samplePartitioner,targetCoverage);
}
else
primaryReadSelector = samplePartitioner;
ReadSelector primaryReadSelector= samplePartitioner;
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
}
@ -660,7 +645,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
int numReads = statesBySample.size();
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
long readLimit = aggregator.getNumReadsSeen();
boolean mrlViolation = false;
if(readLimit > maxReadsAtLocus-totalReadStates) {