Experimental versions of downsampler and Ryan's deduper are now available either
as walker attributes or from the command-line. Not ready yet! Downsampling/deduping works in a general sense, but this approach has not been completely optimized or validated. Use with caution. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3392 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
46ba88018d
commit
017ab6b690
|
|
@ -0,0 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
/**
|
||||
* Type of downsampling method to invoke.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public enum DownsampleType {
|
||||
NONE,
|
||||
ALL_READS,
|
||||
EXPERIMENTAL_BY_SAMPLE,
|
||||
EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
* Describes the method for downsampling reads at a given locus.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class DownsamplingMethod {
|
||||
/**
|
||||
* Type of downsampling to perform.
|
||||
*/
|
||||
public final DownsampleType type;
|
||||
|
||||
/**
|
||||
* Actual downsampling target is specified as an integer number of reads.
|
||||
*/
|
||||
public final Integer toCoverage;
|
||||
|
||||
/**
|
||||
* Actual downsampling target is specified as a fraction of total available reads.
|
||||
*/
|
||||
public final Double toFraction;
|
||||
|
||||
public DownsamplingMethod(DownsampleType type, Integer toCoverage, Double toFraction) {
|
||||
// Do some basic sanity checks on the downsampling parameters passed in.
|
||||
|
||||
// Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
|
||||
if(type != DownsampleType.NONE && type != DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR && toFraction == null && toCoverage == null)
|
||||
throw new StingException("Must specify either toFraction or toCoverage when downsampling.");
|
||||
|
||||
// Fraction and coverage cannot both be specified.
|
||||
if(toFraction != null && toCoverage != null)
|
||||
throw new StingException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
||||
|
||||
// Experimental by sample downsampling does not work with a fraction of reads.
|
||||
if(type == DownsampleType.EXPERIMENTAL_BY_SAMPLE && toFraction != null)
|
||||
throw new StingException("Cannot downsample to fraction with new EXPERIMENTAL_BY_SAMPLE method");
|
||||
|
||||
this.type = type;
|
||||
this.toCoverage = toCoverage;
|
||||
this.toFraction = toFraction;
|
||||
}
|
||||
}
|
||||
|
|
@ -524,10 +524,18 @@ public class GenomeAnalysisEngine {
|
|||
* @return The reads object providing reads source info.
|
||||
*/
|
||||
private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
|
||||
|
||||
DownsamplingMethod method = null;
|
||||
if(argCollection.downsamplingType != DownsampleType.NONE)
|
||||
method = new DownsamplingMethod(argCollection.downsamplingType,argCollection.downsampleCoverage,argCollection.downsampleFraction);
|
||||
else if(WalkerManager.getDownsamplingMethod(walker) != null)
|
||||
method = WalkerManager.getDownsamplingMethod(walker);
|
||||
else
|
||||
method = new DownsamplingMethod(DownsampleType.NONE,null,null);
|
||||
|
||||
return new Reads(argCollection.samFiles,
|
||||
argCollection.strictnessLevel,
|
||||
argCollection.downsampleFraction,
|
||||
argCollection.downsampleCoverage,
|
||||
method,
|
||||
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
|
||||
filters,
|
||||
argCollection.readMaxPileup,
|
||||
|
|
|
|||
|
|
@ -28,8 +28,7 @@ import java.util.Collection;
|
|||
public class Reads {
|
||||
private List<File> readsFiles = null;
|
||||
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
||||
private Double downsamplingFraction = null;
|
||||
private Integer downsampleToCoverage = null;
|
||||
private DownsamplingMethod downsamplingMethod = null;
|
||||
private ValidationExclusion exclusionList = null;
|
||||
private Collection<SamRecordFilter> supplementalFilters = null;
|
||||
protected int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||
|
|
@ -76,19 +75,11 @@ public class Reads {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of reads to downsample.
|
||||
* Gets the method and parameters used when downsampling reads.
|
||||
* @return Downsample fraction.
|
||||
*/
|
||||
public Double getDownsamplingFraction() {
|
||||
return downsamplingFraction;
|
||||
}
|
||||
|
||||
/**
|
||||
* Downsample each locus to the specified coverage.
|
||||
* @return Coverage to which to downsample.
|
||||
*/
|
||||
public Integer getDownsampleToCoverage() {
|
||||
return downsampleToCoverage;
|
||||
public DownsamplingMethod getDownsamplingMethod() {
|
||||
return downsamplingMethod;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,6 +108,7 @@ public class Reads {
|
|||
*/
|
||||
public Reads( List<File> readsFiles ) {
|
||||
this.readsFiles = readsFiles;
|
||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null);
|
||||
this.supplementalFilters = new ArrayList<SamRecordFilter>();
|
||||
this.exclusionList = new ValidationExclusion();
|
||||
}
|
||||
|
|
@ -127,8 +119,6 @@ public class Reads {
|
|||
* is package protected.
|
||||
* @param samFiles list of reads files.
|
||||
* @param strictness Stringency of reads file parsing.
|
||||
* @param downsampleFraction fraction of reads to downsample.
|
||||
* @param downsampleCoverage downsampling per-locus.
|
||||
* @param exclusionList what safety checks we're willing to let slide
|
||||
* @param supplementalFilters additional filters to dynamically apply.
|
||||
* @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with
|
||||
|
|
@ -140,8 +130,7 @@ public class Reads {
|
|||
*/
|
||||
Reads( List<File> samFiles,
|
||||
SAMFileReader.ValidationStringency strictness,
|
||||
Double downsampleFraction,
|
||||
Integer downsampleCoverage,
|
||||
DownsamplingMethod downsamplingMethod,
|
||||
ValidationExclusion exclusionList,
|
||||
Collection<SamRecordFilter> supplementalFilters,
|
||||
int maximumReadsAtLocus,
|
||||
|
|
@ -149,8 +138,7 @@ public class Reads {
|
|||
boolean generateExtendedEvents) {
|
||||
this.readsFiles = samFiles;
|
||||
this.validationStringency = strictness;
|
||||
this.downsamplingFraction = downsampleFraction;
|
||||
this.downsampleToCoverage = downsampleCoverage;
|
||||
this.downsamplingMethod = downsamplingMethod;
|
||||
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
|
||||
this.supplementalFilters = supplementalFilters;
|
||||
this.maximumReadsAtLocus = maximumReadsAtLocus;
|
||||
|
|
|
|||
|
|
@ -238,6 +238,27 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
return filters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the type of downsampling method requested by the walker. If an alternative
|
||||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walker The walker to interrogate.
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Walker walker) {
|
||||
DownsamplingMethod downsamplingMethod = null;
|
||||
|
||||
if( walker.getClass().isAnnotationPresent(Downsample.class) ) {
|
||||
Downsample downsampleParameters = walker.getClass().getAnnotation(Downsample.class);
|
||||
DownsampleType type = downsampleParameters.by();
|
||||
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
|
||||
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
|
||||
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
|
||||
}
|
||||
|
||||
return downsamplingMethod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a name for this type of walker.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.SAMFileReader;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.simpleframework.xml.*;
|
||||
import org.simpleframework.xml.core.Persister;
|
||||
import org.simpleframework.xml.stream.Format;
|
||||
|
|
@ -124,6 +125,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false)
|
||||
public Boolean filterZeroMappingQualityReads = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
|
||||
public DownsampleType downsamplingType = DownsampleType.NONE;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||
public Double downsampleFraction = null;
|
||||
|
|
@ -148,13 +153,9 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus; use this argument if you are running into memory issues resulting from too many reads piled up at a given locus (but use downsample_to_coverage instead if you are trying to downsample); int.MAX_VALUE by default.", required = false)
|
||||
public int readMaxPileup = Integer.MAX_VALUE;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
||||
/** How many threads should be allocated to this analysis. */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
public int numberOfThreads = 1;
|
||||
|
||||
/** What rule should we use when merging intervals */
|
||||
|
|
|
|||
|
|
@ -127,8 +127,8 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
|
||||
// Find the next.
|
||||
seedNextLocus();
|
||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||
if( sourceInfo.getDownsamplingMethod().toCoverage != null )
|
||||
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
|
||||
|
||||
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||
if ((this.loci != null))
|
||||
|
|
|
|||
|
|
@ -261,7 +261,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
|
||||
return applyDecoratingIterators(enableVerification,
|
||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
}
|
||||
|
|
@ -282,7 +282,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
|
||||
return applyDecoratingIterators(shard instanceof ReadShard,
|
||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,14 +165,14 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
iterator = seekRead(shard);
|
||||
iterator = applyDecoratingIterators(true,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
iterator = seekLocus(shard);
|
||||
iterator = applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
} else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) ||
|
||||
|
|
@ -180,7 +180,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
iterator = seekLocus(shard);
|
||||
iterator = applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
|
|
@ -57,15 +55,21 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
|
||||
/**
|
||||
* Create a new window maker with the given iterator as a data source, covering
|
||||
* the given inteervals.
|
||||
* the given intervals.
|
||||
* @param iterator The data source for this window.
|
||||
* @param intervals The set of intervals over which to traverse.
|
||||
*/
|
||||
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals) {
|
||||
this.sourceInfo = iterator.getSourceInfo();
|
||||
this.readIterator = iterator;
|
||||
|
||||
LocusIterator locusIterator = new LocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo);
|
||||
|
||||
LocusIterator locusIterator;
|
||||
if(sourceInfo.getDownsamplingMethod() != null &&
|
||||
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE || sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR))
|
||||
locusIterator = new DownsamplingLocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo);
|
||||
else
|
||||
locusIterator = new LocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo);
|
||||
|
||||
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
|
||||
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -259,7 +261,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
// TODO: Push in header via constructor
|
||||
if(GenomeAnalysisEngine.instance.getDataSource() != null)
|
||||
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
|
||||
readStates = new ReadStateManager(samIterator,sampleNames,readInformation.getMaxReadsAtLocus());
|
||||
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames);
|
||||
this.readInfo = readInformation;
|
||||
|
||||
}
|
||||
|
|
@ -507,8 +509,10 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
|
||||
private class ReadStateManager implements Iterable<SAMRecordState> {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
|
||||
private final Map<String,ReservoirDownsampler<SAMRecord>> downsamplersBySampleName = new HashMap<String,ReservoirDownsampler<SAMRecord>>();
|
||||
private final int maxReadsPerSample;
|
||||
private final int targetCoverage;
|
||||
|
||||
private final Deque<Map<String,List<SAMRecordState>>> readStatesByAlignmentStart;
|
||||
|
||||
|
|
@ -519,11 +523,16 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
*/
|
||||
private Random downsampleRandomizer = new Random(38148309L);
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source, Collection<String> sampleNames, int maxReadsPerSample) {
|
||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, Collection<String> sampleNames) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
this.maxReadsPerSample = maxReadsPerSample;
|
||||
for(String sampleName: sampleNames)
|
||||
downsamplersBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(maxReadsPerSample));
|
||||
this.downsamplingMethod = downsamplingMethod;
|
||||
this.targetCoverage = downsamplingMethod.toCoverage != null ? downsamplingMethod.toCoverage : 1;
|
||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)
|
||||
downsamplersBySampleName.put(null,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||
else {
|
||||
for(String sampleName: sampleNames)
|
||||
downsamplersBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||
}
|
||||
this.readStatesByAlignmentStart = new LinkedList<Map<String,List<SAMRecordState>>>();
|
||||
}
|
||||
|
||||
|
|
@ -606,9 +615,23 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
SAMRecord read = iterator.next();
|
||||
downsamplersBySampleName.get(read.getReadGroup().getSample()).add(read);
|
||||
if(iterator.hasNext() && readStates.size() == 0) {
|
||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
SAMRecord read = iterator.next();
|
||||
getDownsampler(read.getReadGroup().getSample()).add(read);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if(iterator.hasNext() && readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
SAMRecord read = iterator.next();
|
||||
getDownsampler(read.getReadGroup().getSample()).add(read);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,List<SAMRecordState>> culledReadStatesBySample = new HashMap<String,List<SAMRecordState>>();
|
||||
|
|
@ -621,14 +644,14 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
downsampler.clear();
|
||||
int readsInHanger = countReadsInHanger(sampleName);
|
||||
|
||||
if(readsInHanger+newReads.size() <= maxReadsPerSample)
|
||||
if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)
|
||||
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size());
|
||||
else {
|
||||
Iterator<Map<String,List<SAMRecordState>>> backIterator = readStatesByAlignmentStart.descendingIterator();
|
||||
boolean readPruned = true;
|
||||
while(readsInHanger+newReads.size()>maxReadsPerSample && readPruned) {
|
||||
while(readsInHanger+newReads.size()>targetCoverage && readPruned) {
|
||||
readPruned = false;
|
||||
while(readsInHanger+newReads.size()>maxReadsPerSample && backIterator.hasNext()) {
|
||||
while(readsInHanger+newReads.size()>targetCoverage && backIterator.hasNext()) {
|
||||
List<SAMRecordState> readsAtLocus = backIterator.next().get(sampleName);
|
||||
if(readsAtLocus.size() > 1) {
|
||||
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
||||
|
|
@ -638,65 +661,24 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
if(readsInHanger == maxReadsPerSample) {
|
||||
if(readsInHanger == targetCoverage) {
|
||||
Collection<SAMRecordState> firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName);
|
||||
readsInHanger -= firstHangerForSample.size();
|
||||
firstHangerForSample.clear();
|
||||
}
|
||||
|
||||
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,maxReadsPerSample-readsInHanger);
|
||||
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,targetCoverage-readsInHanger);
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(culledReadStatesBySample);
|
||||
}
|
||||
}
|
||||
|
||||
/* else {
|
||||
if() {
|
||||
// Consume the collection of reads.
|
||||
downsamplingIterator.next();
|
||||
|
||||
Map<String,Collection<SAMRecord>> newReadsBySample = new HashMap<String,Collection<SAMRecord>>();
|
||||
Map<String,List<SAMRecordState>> culledReadStatesBySample = new HashMap<String,List<SAMRecordState>>();
|
||||
|
||||
for(String sampleName: sampleNames)
|
||||
newReadsBySample.put(sampleName,getReadsForGivenSample(reads,sampleName));
|
||||
|
||||
for(String sampleName: newReadsBySample.keySet()) {
|
||||
Collection<SAMRecord> newReads = newReadsBySample.get(sampleName);
|
||||
int readsInHanger = countReadsInHanger(sampleName);
|
||||
|
||||
//if(readsInHanger+newReads.size() <= maxReadsPerSample)
|
||||
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size());
|
||||
Iterator<Map<String,List<SAMRecordState>>> backIterator = readStatesByAlignmentStart.descendingIterator();
|
||||
boolean readPruned = true;
|
||||
while(readsInHanger+newReads.size()>maxReadsPerSample && readPruned) {
|
||||
readPruned = false;
|
||||
while(readsInHanger+newReads.size()>maxReadsPerSample && backIterator.hasNext()) {
|
||||
List<SAMRecordState> readsAtLocus = backIterator.next().get(sampleName);
|
||||
if(readsAtLocus.size() > 1) {
|
||||
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
||||
readPruned = true;
|
||||
readsInHanger--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(readsInHanger == maxReadsPerSample) {
|
||||
Collection<SAMRecordState> firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName);
|
||||
readsInHanger -= firstHangerForSample.size();
|
||||
firstHangerForSample.clear();
|
||||
}
|
||||
|
||||
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,maxReadsPerSample-readsInHanger);
|
||||
}
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(culledReadStatesBySample);
|
||||
}
|
||||
else if(readIsPastCurrentPosition(reads.iterator().next()))
|
||||
break;
|
||||
}
|
||||
*/
|
||||
private ReservoirDownsampler<SAMRecord> getDownsampler(String sampleName) {
|
||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)
|
||||
return downsamplersBySampleName.get(null);
|
||||
else
|
||||
return downsamplersBySampleName.get(sampleName);
|
||||
}
|
||||
|
||||
private int countReadsInHanger() {
|
||||
|
|
@ -737,9 +719,10 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
Iterator<Map<String,List<SAMRecordState>>> hangerIterator = readStatesByAlignmentStart.iterator();
|
||||
while(hangerIterator.hasNext()) {
|
||||
Map<String,List<SAMRecordState>> hangerEntry = hangerIterator.next();
|
||||
for(String sampleName: sampleNames) {
|
||||
if(hangerEntry.containsKey(sampleName) && hangerEntry.get(sampleName).size() == 0)
|
||||
hangerEntry.remove(sampleName);
|
||||
Iterator<List<SAMRecordState>> entryBySampleIterator = hangerEntry.values().iterator();
|
||||
while(entryBySampleIterator.hasNext()) {
|
||||
if(entryBySampleIterator.next().size() == 0)
|
||||
entryBySampleIterator.remove();
|
||||
}
|
||||
if(hangerEntry.size() == 0)
|
||||
hangerIterator.remove();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Specifies a method for downsampling the reads passed to a given
|
||||
* walker based on the input from that walker.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface Downsample {
|
||||
DownsampleType by();
|
||||
int toCoverage() default -1;
|
||||
double toFraction() default -1.0F;
|
||||
}
|
||||
|
|
@ -73,7 +73,7 @@ public class ReservoirDownsampler<T> implements Collection<T> {
|
|||
* @return The downsampled contents of this reservoir.
|
||||
*/
|
||||
public Collection<T> getDownsampledContents() {
|
||||
return Collections.unmodifiableCollection(reservoir);
|
||||
return (Collection<T>)reservoir.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@ public class GATKArgumentCollectionUnitTest extends BaseTest {
|
|||
collect.intervals = new ArrayList<String>();
|
||||
collect.intervals.add("intervals".toLowerCase());
|
||||
collect.excludeIntervals = new ArrayList<String>();
|
||||
collect.disableThreading = false;
|
||||
collect.outFileName = "outFileName".toLowerCase();
|
||||
collect.errFileName = "errFileName".toLowerCase();
|
||||
collect.outErrFileName = "outErrFileName".toLowerCase();
|
||||
|
|
|
|||
Loading…
Reference in New Issue