We're unable to make the naive deduper more worldly, so we're killing it instead.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3587 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
42e7ff4f28
commit
5050b19457
|
|
@ -10,6 +10,5 @@ package org.broadinstitute.sting.gatk;
|
|||
public enum DownsampleType {
|
||||
NONE,
|
||||
ALL_READS,
|
||||
EXPERIMENTAL_BY_SAMPLE,
|
||||
EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR
|
||||
EXPERIMENTAL_BY_SAMPLE
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class DownsamplingMethod {
|
|||
// Do some basic sanity checks on the downsampling parameters passed in.
|
||||
|
||||
// Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
|
||||
if(type != DownsampleType.NONE && type != DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR && toFraction == null && toCoverage == null)
|
||||
if(type != DownsampleType.NONE && toFraction == null && toCoverage == null)
|
||||
throw new StingException("Must specify either toFraction or toCoverage when downsampling.");
|
||||
|
||||
// Fraction and coverage cannot both be specified.
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
LocusIterator locusIterator;
|
||||
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
||||
if(sourceInfo.getDownsamplingMethod() != null &&
|
||||
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE || sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)) {
|
||||
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)) {
|
||||
if ( discards.size() > 0 )
|
||||
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
|
||||
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);
|
||||
|
|
|
|||
|
|
@ -552,20 +552,10 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
|
||||
private int totalReadStates = 0;
|
||||
|
||||
/**
|
||||
* Store a random number generator with a consistent seed for consistent downsampling from run to run.
|
||||
* Note that each shard will be initialized with the same random seed; this will ensure consistent results
|
||||
* across parallelized runs, at the expense of decreasing our level of randomness.
|
||||
*/
|
||||
private Random downsampleRandomizer = new Random(38148309L);
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
this.downsamplingMethod = downsamplingMethod;
|
||||
switch(downsamplingMethod.type) {
|
||||
case EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR:
|
||||
this.targetCoverage = downsamplingMethod.toCoverage != null ? downsamplingMethod.toCoverage : 1;
|
||||
break;
|
||||
case EXPERIMENTAL_BY_SAMPLE:
|
||||
if(downsamplingMethod.toCoverage == null)
|
||||
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||
|
|
@ -580,12 +570,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
for(String sampleName: sampleNames)
|
||||
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
||||
|
||||
ReadSelector primaryReadSelector;
|
||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||
primaryReadSelector = new NRandomReadSelector(samplePartitioner,targetCoverage);
|
||||
}
|
||||
else
|
||||
primaryReadSelector = samplePartitioner;
|
||||
ReadSelector primaryReadSelector= samplePartitioner;
|
||||
|
||||
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
|
||||
}
|
||||
|
|
@ -660,7 +645,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
|
||||
int numReads = statesBySample.size();
|
||||
|
||||
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
|
||||
long readLimit = aggregator.getNumReadsSeen();
|
||||
boolean mrlViolation = false;
|
||||
if(readLimit > maxReadsAtLocus-totalReadStates) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue