Eliminated -mrl option.

Eliminated -fmq0 option.
Eliminated read group hallucination.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4133 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-08-26 21:38:03 +00:00
parent 7671502e1b
commit d773b3264b
19 changed files with 68 additions and 546 deletions

View File

@ -10,5 +10,5 @@ package org.broadinstitute.sting.gatk;
public enum DownsampleType {
NONE,
ALL_READS,
EXPERIMENTAL_BY_SAMPLE
BY_SAMPLE
}

View File

@ -25,6 +25,11 @@ public class DownsamplingMethod {
*/
public final Double toFraction;
/**
* Expresses no downsampling applied at all.
*/
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null);
public DownsamplingMethod(DownsampleType type, Integer toCoverage, Double toFraction) {
// Do some basic sanity checks on the downsampling parameters passed in.
@ -37,7 +42,7 @@ public class DownsamplingMethod {
throw new StingException("Downsampling coverage and fraction are both specified. Please choose only one.");
// Experimental by sample downsampling does not work with a fraction of reads.
if(type == DownsampleType.EXPERIMENTAL_BY_SAMPLE && toFraction != null)
if(type == DownsampleType.BY_SAMPLE && toFraction != null)
throw new StingException("Cannot downsample to fraction with new EXPERIMENTAL_BY_SAMPLE method");
this.type = type;

View File

@ -337,8 +337,6 @@ public class GenomeAnalysisEngine {
protected Collection<SamRecordFilter> createFiltersForWalker(GATKArgumentCollection args, Walker walker) {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
if (args.readGroupBlackList != null && args.readGroupBlackList.size() > 0)
filters.add(new ReadGroupBlackListFilter(args.readGroupBlackList));
for(String filterName: args.readFilters)
@ -568,12 +566,12 @@ public class GenomeAnalysisEngine {
private ReadProperties extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
DownsamplingMethod method = null;
if(argCollection.downsamplingType != DownsampleType.NONE)
method = new DownsamplingMethod(argCollection.downsamplingType,argCollection.downsampleCoverage,argCollection.downsampleFraction);
if(argCollection.getDownsamplingMethod() != null)
method = argCollection.getDownsamplingMethod();
else if(WalkerManager.getDownsamplingMethod(walker) != null)
method = WalkerManager.getDownsamplingMethod(walker);
else
method = new DownsamplingMethod(DownsampleType.NONE,null,null);
method = argCollection.getDefaultDownsamplingMethod();
return new ReadProperties(unpackBAMFileList(argCollection.samFiles),
argCollection.strictnessLevel,
@ -581,7 +579,6 @@ public class GenomeAnalysisEngine {
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
argCollection.readMaxPileup,
walker.includeReadsWithDeletionAtLoci(),
walker.generateExtendedEvents());
}

View File

@ -32,7 +32,6 @@ public class ReadProperties {
private DownsamplingMethod downsamplingMethod = null;
private ValidationExclusion exclusionList = null;
private Collection<SamRecordFilter> supplementalFilters = null;
protected int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
private boolean includeReadsWithDeletionAtLoci = false;
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
// immediately after the reference base such event is associated with?
@ -91,14 +90,6 @@ public class ReadProperties {
return downsamplingMethod;
}
/**
* get the maximum number of reads we allow at a locus for locus-by-hanger
* @return the maximum reads allowed in a pile-up
*/
public Integer getMaxReadsAtLocus() {
return maximumReadsAtLocus;
}
/**
* Return whether to 'verify' the reads as we pass through them.
* @return Whether to verify the reads.
@ -117,7 +108,7 @@ public class ReadProperties {
*/
public ReadProperties( List<SAMReaderID> readsFiles ) {
this.readers = readsFiles;
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null);
this.downsamplingMethod = DownsamplingMethod.NONE;
this.supplementalFilters = new ArrayList<SamRecordFilter>();
this.exclusionList = new ValidationExclusion();
}
@ -144,7 +135,6 @@ public class ReadProperties {
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents) {
this.readers = samFiles;
@ -153,7 +143,6 @@ public class ReadProperties {
this.downsamplingMethod = downsamplingMethod;
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.generateExtendedEvents = generateExtendedEvents;
}

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.simpleframework.xml.*;
@ -113,13 +114,18 @@ public class GATKArgumentCollection {
@Input(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
public String HAPMAPChipFile = null;
@Element(required = false)
@Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false)
public Boolean filterZeroMappingQualityReads = false;
/**
* The override mechanism in the GATK, by default, populates the command-line arguments, then
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
* by a user explicitly specifying command-line arguments.
* TODO: Change the GATK so that walker defaults are loaded first, then command-line arguments.
*/
private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;
@Element(required = false)
@Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
public DownsampleType downsamplingType = DownsampleType.NONE;
public DownsampleType downsamplingType = null;
@Element(required = false)
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
@ -129,6 +135,26 @@ public class GATKArgumentCollection {
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false)
public Integer downsampleCoverage = null;
/**
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
* a default downsampling mechanism, return null.
* @return The explicitly specified downsampling mechanism, or null if none exists.
*/
public DownsamplingMethod getDownsamplingMethod() {
if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
return null;
return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
}
/**
* Gets the default downsampling method, returned if the user didn't specify any downsampling
* method.
* @return The default downsampling mechanism, or null if none exists.
*/
public DownsamplingMethod getDefaultDownsamplingMethod() {
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
}
@Element(required = false)
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
public Boolean useOriginalBaseQualities = false;
@ -141,10 +167,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public ValidationExclusion.TYPE unsafe;
@Element(required = false)
@Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus; use this argument if you are running into memory issues resulting from too many reads piled up at a given locus (but use downsample_to_coverage instead if you are trying to downsample); int.MAX_VALUE by default.", required = false)
public int readMaxPileup = Integer.MAX_VALUE;
/** How many threads should be allocated to this analysis. */
@Element(required = false)
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
@ -295,13 +317,6 @@ public class GATKArgumentCollection {
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
if (other.readMaxPileup != this.readMaxPileup) {
return false;
}
if ((other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null) ||
(other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads))) {
return false;
}
if ((other.downsampleFraction == null && this.downsampleFraction != null) ||
(other.downsampleFraction != null && !other.downsampleFraction.equals(this.downsampleFraction))) {
return false;

View File

@ -30,8 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.util.*;
@ -47,7 +46,6 @@ import java.util.*;
public class AlignmentContext {
protected GenomeLoc loc = null;
protected ReadBackedPileup basePileup = null;
private LocusOverflowTracker tracker;
/**
* The number of bases we've skipped over in the reference since the last map invocation.
@ -191,21 +189,4 @@ public class AlignmentContext {
public long getSkippedBases() {
return skippedBases;
}
/**
* a method for injecting the current locus overflow tracker into the alignment context.
* @param state
*/
public void setLocusOverflowTracker(LocusOverflowTracker state) {
this.tracker = state;
}
/**
* have we exceeded the maximum pileup at the current locus?
* @return true if we have, false otherwise
*/
public boolean hasExceededMaxPileup() {
if (this.tracker == null) return false;
return this.tracker.inDroppedRegion(getLocation());
}
}

View File

@ -6,7 +6,6 @@ import java.util.Collections;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
/**
@ -91,12 +90,6 @@ public class AllLocusView extends LocusView {
return currentLocus;
}
@Override
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't hold a overflow tracker
return null;
}
/**
* Creates a blank locus context at the specified location.
* @param site Site at which to create the blank locus context.

View File

@ -1,9 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.apache.log4j.Logger;
/**
* User: hanna
* Date: May 12, 2009
@ -38,10 +35,4 @@ public class CoveredLocusView extends LocusView {
public AlignmentContext next() {
return nextLocus();
}
@Override
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't store a tracker
return null;
}
}

View File

@ -126,9 +126,6 @@ public abstract class LocusView extends LocusIterator implements View {
if( sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null )
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
if ((this.loci != null))
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
return current;
}

View File

@ -34,7 +34,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.MergingIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
@ -139,11 +138,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
}
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't have an overflow tracker
return null;
}
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
RefMetaDataTracker t = new RefMetaDataTracker();
for ( RODRecordList track : allTracksHere ) {

View File

@ -560,14 +560,6 @@ public class SAMDataSource implements SimpleDataSource {
final SAMFileHeader header = reader.getFileHeader();
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
if (reader.getFileHeader().getReadGroups().size() < 1) {
SAMReadGroupRecord rec = new SAMReadGroupRecord(readerID.samFile.getName());
rec.setLibrary(readerID.samFile.getName());
rec.setSample(readerID.samFile.getName());
reader.getFileHeader().addReadGroup(rec);
}
readers.put(readerID,reader);
}
}

View File

@ -29,11 +29,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private final StingSAMIterator readIterator;
/**
* The locus overflow tracker.
*/
private final LocusOverflowTracker locusOverflowTracker;
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
@ -62,8 +57,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards);
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
@ -122,10 +115,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
return sourceIterator.next();
}
public LocusOverflowTracker getLocusOverflowTracker() {
return locusOverflowTracker;
}
public void seedNextLocus() {
// locus == null when doing monolithic sharding.
while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus))

View File

@ -29,12 +29,4 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
/**
* a method for getting the overflow tracker, which is used to track sites at which the read count exceeds the
* pile-up threshold set on the command line
*
* @return the overflow tracker, null if no tracker exists
*/
public abstract LocusOverflowTracker getLocusOverflowTracker();
}

View File

@ -50,12 +50,6 @@ public class LocusIteratorByState extends LocusIterator {
//public static final EnumSet<Discard> NO_DISCARDS = EnumSet.noneOf(Discard.class);
public static final List<LocusIteratorFilter> NO_FILTERS = Arrays.asList();
/**
* the overflow tracker, which makes sure we get a limited number of warnings for locus pile-ups that
* exceed the max depth
*/
private LocusOverflowTracker overflowTracker;
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
@ -285,10 +279,9 @@ public class LocusIteratorByState extends LocusIterator {
}
// Add a null sample name as a catch-all for reads without samples
if(!sampleNames.contains(null)) sampleNames.add(null);
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),readInformation.getMaxReadsAtLocus(),sampleNames);
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames);
this.readInfo = readInformation;
this.filters = filters;
overflowTracker = new LocusOverflowTracker(readInformation.getMaxReadsAtLocus());
}
public Iterator<AlignmentContext> iterator() {
@ -301,14 +294,8 @@ public class LocusIteratorByState extends LocusIterator {
public boolean hasNext() {
lazyLoadNextAlignmentContext();
boolean r = (nextAlignmentContext != null);
return (nextAlignmentContext != null);
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
// if we don't have a next record, make sure we clean the warning queue
// TODO: Note that this implementation requires that hasNext() always be called before next().
if (!r) overflowTracker.cleanWarningQueue();
return r;
}
public void printState() {
@ -537,22 +524,6 @@ public class LocusIteratorByState extends LocusIterator {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
/**
* a method for setting the overflow tracker, for dependency injection
* @param tracker
*/
protected void setLocusOverflowTracker(LocusOverflowTracker tracker) {
this.overflowTracker = tracker;
}
/**
* a method for getting the overflow tracker
* @return the overflow tracker, null if none exists
*/
public LocusOverflowTracker getLocusOverflowTracker() {
return this.overflowTracker;
}
private class ReadStateManager {
private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod;
@ -563,15 +534,14 @@ public class LocusIteratorByState extends LocusIterator {
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
private final int targetCoverage;
private final int maxReadsAtLocus;
private int totalReadStates = 0;
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, Collection<String> sampleNames) {
this.iterator = new PeekableIterator<SAMRecord>(source);
this.downsamplingMethod = downsamplingMethod;
switch(downsamplingMethod.type) {
case EXPERIMENTAL_BY_SAMPLE:
case BY_SAMPLE:
if(downsamplingMethod.toCoverage == null)
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
this.targetCoverage = downsamplingMethod.toCoverage;
@ -579,17 +549,16 @@ public class LocusIteratorByState extends LocusIterator {
default:
this.targetCoverage = Integer.MAX_VALUE;
}
this.maxReadsAtLocus = maxReadsAtLocus;
samplePartitioner = new SamplePartitioner(sampleNames);
for(String sampleName: sampleNames)
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
ReadSelector primaryReadSelector = samplePartitioner;
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)
if(downsamplingMethod.type == DownsampleType.BY_SAMPLE)
primaryReadSelector = new NRandomReadSelector(primaryReadSelector,targetCoverage);
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
chainedReadSelector = primaryReadSelector;
}
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
@ -665,12 +634,7 @@ public class LocusIteratorByState extends LocusIterator {
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
long readLimit = aggregator.getNumReadsSeen();
boolean mrlViolation = false;
if(readLimit > maxReadsAtLocus-totalReadStates) {
readLimit = maxReadsAtLocus-totalReadStates;
mrlViolation = true;
}
addReadsToSample(statesBySample,newReads,readLimit,mrlViolation);
addReadsToSample(statesBySample,newReads,readLimit);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
else {
@ -709,7 +673,7 @@ public class LocusIteratorByState extends LocusIterator {
}
downsamplingExtent = Math.max(downsamplingExtent,statesBySample.purge(toPurge));
addReadsToSample(statesBySample,newReads,targetCoverage-numReads,false);
addReadsToSample(statesBySample,newReads,targetCoverage-numReads);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
}
@ -722,15 +686,10 @@ public class LocusIteratorByState extends LocusIterator {
* @param reads Reads to add. Selected reads will be pulled from this source.
* @param maxReads Maximum number of reads to add.
*/
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads, boolean atMaxReadsAtLocusLimit) {
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
if(reads.isEmpty())
return;
GenomeLoc location = null;
// the farthest right a read extends
Integer rightMostEnd = -1;
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
int readCount = 0;
for(SAMRecord read: reads) {
@ -742,17 +701,8 @@ public class LocusIteratorByState extends LocusIterator {
if (state.hadIndel()) hasExtendedEvents = true;
readCount++;
}
if(atMaxReadsAtLocusLimit) {
if (location == null)
location = GenomeLocParser.createGenomeLoc(read);
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
}
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
if (location != null)
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
totalReadStates);
}
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
@ -940,72 +890,6 @@ interface ReadSelector {
public void reset();
}
/**
* Choose the first N reads from the submitted set.
*/
class FirstNReadSelector implements ReadSelector {
private final ReadSelector chainedSelector;
private final Collection<SAMRecord> selectedReads = new LinkedList<SAMRecord>();
private final long readLimit;
private long readsSeen = 0;
private int downsamplingExtent = 0;
public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) {
this.chainedSelector = chainedSelector;
this.readLimit = readLimit;
}
public void submitRead(SAMRecord read) {
if(readsSeen < readLimit) {
selectedReads.add(read);
if(chainedSelector != null)
chainedSelector.submitRead(read);
}
else
if(chainedSelector != null) {
chainedSelector.notifyReadRejected(read);
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
}
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
if(chainedSelector != null)
chainedSelector.notifyReadRejected(read);
readsSeen++;
}
public void complete() {
if(chainedSelector != null)
chainedSelector.complete();
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return selectedReads.size();
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return selectedReads;
}
public void reset() {
selectedReads.clear();
readsSeen = 0;
downsamplingExtent = 0;
if(chainedSelector != null)
chainedSelector.reset();
}
}
/**
* Select N reads randomly from the input stream.
*/

View File

@ -1,113 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
/**
* a helper class that organizes the output of warning messages from read pile-ups that
* are greater than the max pile-up size. We only want a single warning from each non-contigous
* interval, up until the maximum warning limit.
*
* cleanWarningQueue() must be called when you're finished with the LocusOverflowTracker to make
* sure that no errors are left in the queue.
*
*/
public class LocusOverflowTracker {
// the last interval we emitted a warning for
protected GenomeLoc lastLocation = null;
// the maximum warning count, and the number of warnings emitted
protected static int warningsEmitted = 0;
public static final int MAX_WARNINGS = 100;
// our maximum pileup size
protected final int maxPileupSize;
// do we have a pending warning?
protected boolean warningInQueue = false;
/**
* create a LocusOverflowTracker
*
* @param maxPileup the maximum allowed pile-up size
*/
public LocusOverflowTracker(int maxPileup) {
warningInQueue = false;
maxPileupSize = maxPileup;
}
/**
* have we exceeded the maximum pile-up size?
*
* @param loc the current location
* @param pileupSize the pile-up size
*
* @return return true if we're greater, false if we're not
*/
public boolean exceeded(GenomeLoc loc, int pileupSize) {
boolean exceeded = pileupSize >= maxPileupSize;
if (exceeded) {
// if the last location is null, we're starting a new region that exceeds max_reads_at_locus
if (lastLocation == null) lastLocation = loc;
// are we contiguous to the last genome loc?
else if (lastLocation.contiguousP(loc)) {
lastLocation = lastLocation.merge(loc);
}
// we have an existing region, and the current is not contiguous. Emit the old and store the new
else {
warnUser();
lastLocation = loc;
}
// regardless, we have a warning in the queue
warningInQueue = true;
}
// we don't have a warning at this, but there is one in the queue
else if (warningInQueue) {
warnUser();
lastLocation = null;
}
// return true if we exceeded the max size at this location
return exceeded;
}
/**
* clean up the warning queue, making sure we haven't stored a warning
* that hasn't been emitted yet.
*/
public void cleanWarningQueue() {
if (warningInQueue) warnUser();
}
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
protected void warnUser() {
// make sure we have a warning in the queue
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
// reset the warning light
warningInQueue = false;
// check to see if we've meet our warning threshold or not. If we're equal to the threshold emit a message saying this
// is the last warning they'll see
if (warningsEmitted < MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
} else if (warningsEmitted == MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
}
}
/**
* is the specified location in the current exceeded pileup region
* @param position the position
* @return true if we're in that region
*/
public boolean inDroppedRegion(GenomeLoc position) {
if (lastLocation == null || position == null) return false;
return position.overlapsP(lastLocation) ? true : false;
}
}

View File

@ -67,21 +67,7 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists.
*/
public SAMRecord next() {
SAMRecord read = wrappedIterator.next();
// if we don't have a read group, set one.
// TODO: Straw poll to see whether this is really required.
if (read.getAttribute(SAMTag.RG.toString()) == null && read.getFileSource() != null && read.getFileSource().getReader() != null) {
List<SAMReadGroupRecord> readGroups = read.getFileSource().getReader().getFileHeader().getReadGroups();
if (readGroups.size() == 1) {
read.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId());
read.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId());
} else {
logger.warn("Unable to set read group of ungrouped read: unable to pick default group, there are " + readGroups.size() + " possible.");
}
}
return new GATKSAMRecord(read);
return new GATKSAMRecord(wrappedIterator.next());
}
/**

View File

@ -132,57 +132,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
@Test
public void testBasicWarnings() {
// create a bunch of fake records
List<SAMRecord> records = new ArrayList<SAMRecord>();
for (int x = 1; x < 50; x++)
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
// create a test version of the Reads object
ReadProperties reads = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
((LocusIteratorOverride)li.getLocusOverflowTracker()).resetWarningCount();
while (li.hasNext()) {
AlignmentContext context = li.next();
//System.err.println(context.getLocation() + " " + context.getPileup().size());
}
Assert.assertEquals(1, ((LocusIteratorOverride) li.getLocusOverflowTracker()).getWarningCount());
}
@Test
public void testTwoBigPiles() {
// create a bunch of fake records
List<SAMRecord> records = new ArrayList<SAMRecord>();
for (int x = 1; x < 50; x++)
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20));
for (int x = 1; x < 50; x++)
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
// create a test version of the Reads object
ReadProperties reads = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
((LocusIteratorOverride)li.getLocusOverflowTracker()).resetWarningCount();
while (li.hasNext()) {
AlignmentContext context = li.next();
//System.err.println(context.getLocation() + " " + context.getPileup().size());
}
li.getLocusOverflowTracker().cleanWarningQueue();
Assert.assertEquals(2, ((LocusIteratorOverride) li.getLocusOverflowTracker()).getWarningCount());
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> {

View File

@ -1,118 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* @author aaron
* <p/>
* Class LocusOverflowTrackerUnitTest
* <p/>
* test out the locus overflow tracker
*/
public class LocusOverflowTrackerUnitTest extends BaseTest {
private LocusOverflowTracker tracker;
private final int MAX_READS = 10;
private static SAMFileHeader header;
@BeforeClass
public static void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Before
public void beforeTest() {
tracker = new LocusIteratorOverride(MAX_READS);
((LocusIteratorOverride) tracker).resetWarningCount();
}
@Test
public void testLocusOverflow() {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 100);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec);
if (tracker.exceeded(loc, MAX_READS - 1))
Assert.fail("We shouldn't be exceeded when MAX_READS -1 is the input");
if (!tracker.exceeded(loc, MAX_READS)) Assert.fail("We should be exceeded when MAX_READS is the input");
if (!tracker.exceeded(loc, MAX_READS + 1))
Assert.fail("We shouldn't be exceeded when MAX_READS +1 is the input");
}
@Test
public void testContinuousLocus() {
for (int x = 1; x < 5; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 100);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec);
tracker.exceeded(loc, MAX_READS + 1);
}
tracker.cleanWarningQueue();
Assert.assertEquals(1, ((LocusIteratorOverride) tracker).getWarningCount());
}
@Test
public void testTwoSeperateContinuousLoci() {
for (int x = 1; x < 5; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 2);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec);
tracker.exceeded(loc, MAX_READS + 1);
}
for (int x = 10; x < 15; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 2);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec);
tracker.exceeded(loc, MAX_READS + 1);
}
tracker.cleanWarningQueue();
Assert.assertEquals(2, ((LocusIteratorOverride) tracker).getWarningCount());
}
@Test
// make sure we get only the specified number of warnings
public void testOverflow() {
for (int x = 1; x < (LocusOverflowTracker.warningsEmitted * 3); x += 2) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 100);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec);
tracker.exceeded(loc, MAX_READS + 1);
}
tracker.cleanWarningQueue();
Assert.assertEquals(LocusOverflowTracker.warningsEmitted, ((LocusIteratorOverride) tracker).getWarningCount());
}
}
class LocusIteratorOverride extends LocusOverflowTracker {
public int warningsDropped = 0;
public LocusIteratorOverride(int maxPileup) {
super(maxPileup);
}
/** override to count warnings instead of generating output. */
protected void warnUser() {
warningInQueue = false;
if (warningsEmitted <= MAX_WARNINGS)
warningsEmitted++;
else
warningsDropped++;
}
public int getWarningCount() {
return warningsEmitted;
}
public void resetWarningCount() {
LocusOverflowTracker.warningsEmitted = 0;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010.
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,15 +12,14 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers;
@ -47,23 +46,23 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
}
final static String Q10ClipOutput = "b29c5bc1cb9006ed9306d826a11d444f";
@Test public void testQClip0() { testClipper("clipQSum0", "-QT 0", "117a4760b54308f81789c39b1c9de578", "2465660bcd975a1dc6dfbf40a21bf6ad"); }
@Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", Q10ClipOutput, "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testQClip10() { testClipper("clipQSum10", "-QT 10", "b29c5bc1cb9006ed9306d826a11d444f", "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testQClip20() { testClipper("clipQSum20", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "9a4b1c83c026ca83db00bb71999246cf"); }
@Test public void testQClip30() { testClipper("clipQSum30", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "9a4b1c83c026ca83db00bb71999246cf"); }
@Test public void testQClip0() { testClipper("clipQSum0", "-QT 0", "117a4760b54308f81789c39b1c9de578", "4deca83d80dfa3f093e0dc27d27d1352"); }
@Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", Q10ClipOutput, "1e123233ebd2f35ac3f41b1b7d2c8199"); }
@Test public void testQClip10() { testClipper("clipQSum10", "-QT 10", "b29c5bc1cb9006ed9306d826a11d444f", "1e123233ebd2f35ac3f41b1b7d2c8199"); }
@Test public void testQClip20() { testClipper("clipQSum20", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "b950538d2d8fac1bcee11850c452bd6a"); }
@Test public void testQClip30() { testClipper("clipQSum30", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "b950538d2d8fac1bcee11850c452bd6a"); }
@Test public void testClipRange1() { testClipper("clipRange1", "-CT 1-5", "b5acd753226e25b1e088838c1aab9117", "9a08474a13fbb897b7c9dca58d19884f"); }
@Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "f05ab5fe821b77cd5b066212ff56f8ff"); }
@Test public void testClipRange1() { testClipper("clipRange1", "-CT 1-5", "b5acd753226e25b1e088838c1aab9117", "9f70540b795f227668dcf78edcb35c09"); }
@Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "a22347a741640fc6df92700e0e8d6f61"); }
@Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "c218c0649838423a06f3296430f65c4f"); }
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + validationDataLocation + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "1151e10833da794203df2ba7cc76d5c5"); }
@Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "f49e9e61a44115e2be59330259966f53"); }
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + validationDataLocation + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "5c977f261442ab6122d5198fa4086e67"); }
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + validationDataLocation + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "4a1153d6f0600cf53ff7959a043e57cc"); }
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + validationDataLocation + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "38d5f33d198aeee7eebec9feb7b11199"); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "fb77d3122df468a71e03ca92b69493f4"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "24053a87b00c0bc2ddf420975e9fea4d"); }
@Test public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", Q10ClipOutput, "aeb67cca75285a68af8a965faa547e7f"); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "1e123233ebd2f35ac3f41b1b7d2c8199"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "d44cab2e3b70f5492a0f5b59f0b80043"); }
@Test public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", Q10ClipOutput, "b86374a7e6f59e3dd35781e9e8006702"); }
@Test
public void testUseOriginalQuals() {