We're unable to make the naive deduper more worldly, so we're killing it instead.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3587 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
42e7ff4f28
commit
5050b19457
|
|
@ -10,6 +10,5 @@ package org.broadinstitute.sting.gatk;
|
||||||
public enum DownsampleType {
|
public enum DownsampleType {
|
||||||
NONE,
|
NONE,
|
||||||
ALL_READS,
|
ALL_READS,
|
||||||
EXPERIMENTAL_BY_SAMPLE,
|
EXPERIMENTAL_BY_SAMPLE
|
||||||
EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ public class DownsamplingMethod {
|
||||||
// Do some basic sanity checks on the downsampling parameters passed in.
|
// Do some basic sanity checks on the downsampling parameters passed in.
|
||||||
|
|
||||||
// Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
|
// Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
|
||||||
if(type != DownsampleType.NONE && type != DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR && toFraction == null && toCoverage == null)
|
if(type != DownsampleType.NONE && toFraction == null && toCoverage == null)
|
||||||
throw new StingException("Must specify either toFraction or toCoverage when downsampling.");
|
throw new StingException("Must specify either toFraction or toCoverage when downsampling.");
|
||||||
|
|
||||||
// Fraction and coverage cannot both be specified.
|
// Fraction and coverage cannot both be specified.
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
||||||
LocusIterator locusIterator;
|
LocusIterator locusIterator;
|
||||||
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
||||||
if(sourceInfo.getDownsamplingMethod() != null &&
|
if(sourceInfo.getDownsamplingMethod() != null &&
|
||||||
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE || sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)) {
|
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)) {
|
||||||
if ( discards.size() > 0 )
|
if ( discards.size() > 0 )
|
||||||
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
|
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
|
||||||
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);
|
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);
|
||||||
|
|
|
||||||
|
|
@ -552,20 +552,10 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
private int totalReadStates = 0;
|
private int totalReadStates = 0;
|
||||||
|
|
||||||
/**
|
|
||||||
* Store a random number generator with a consistent seed for consistent downsampling from run to run.
|
|
||||||
* Note that each shard will be initialized with the same random seed; this will ensure consistent results
|
|
||||||
* across parallelized runs, at the expense of decreasing our level of randomness.
|
|
||||||
*/
|
|
||||||
private Random downsampleRandomizer = new Random(38148309L);
|
|
||||||
|
|
||||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
|
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
|
||||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||||
this.downsamplingMethod = downsamplingMethod;
|
this.downsamplingMethod = downsamplingMethod;
|
||||||
switch(downsamplingMethod.type) {
|
switch(downsamplingMethod.type) {
|
||||||
case EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR:
|
|
||||||
this.targetCoverage = downsamplingMethod.toCoverage != null ? downsamplingMethod.toCoverage : 1;
|
|
||||||
break;
|
|
||||||
case EXPERIMENTAL_BY_SAMPLE:
|
case EXPERIMENTAL_BY_SAMPLE:
|
||||||
if(downsamplingMethod.toCoverage == null)
|
if(downsamplingMethod.toCoverage == null)
|
||||||
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||||
|
|
@ -580,12 +570,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
for(String sampleName: sampleNames)
|
for(String sampleName: sampleNames)
|
||||||
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
||||||
|
|
||||||
ReadSelector primaryReadSelector;
|
ReadSelector primaryReadSelector= samplePartitioner;
|
||||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
|
||||||
primaryReadSelector = new NRandomReadSelector(samplePartitioner,targetCoverage);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
primaryReadSelector = samplePartitioner;
|
|
||||||
|
|
||||||
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
|
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
|
||||||
}
|
}
|
||||||
|
|
@ -660,7 +645,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
|
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
|
||||||
int numReads = statesBySample.size();
|
int numReads = statesBySample.size();
|
||||||
|
|
||||||
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
|
||||||
long readLimit = aggregator.getNumReadsSeen();
|
long readLimit = aggregator.getNumReadsSeen();
|
||||||
boolean mrlViolation = false;
|
boolean mrlViolation = false;
|
||||||
if(readLimit > maxReadsAtLocus-totalReadStates) {
|
if(readLimit > maxReadsAtLocus-totalReadStates) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue