Massive cleanup of read filtering.

- Eliminate reduncancy of filter application.
- Track filter metrics per-shard to facitate per merging.
- Flatten counting iterator hierarchy for easier debugging.
- Rename Reads class to ReadProperties and track it outside of the Sting iterators.
Note: because shards are currently tied so closely to reads and not the merged triplet of <reads,ref,RODs>, the metrics
classes are managed by the SAMDataSource when they should be managed by something more general.  For now, we're hacking
the reads data source to manage the metrics; in the future, something more general should manage the metrics classes.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4015 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-08-11 20:17:11 +00:00
parent 86bd55408e
commit 41d57b7139
56 changed files with 689 additions and 850 deletions

View File

@ -523,7 +523,7 @@ public class GenomeAnalysisEngine {
* @param argCollection The collection of arguments passed to the engine.
* @return The reads object providing reads source info.
*/
private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
private ReadProperties extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
DownsamplingMethod method = null;
if(argCollection.downsamplingType != DownsampleType.NONE)
@ -533,7 +533,7 @@ public class GenomeAnalysisEngine {
else
method = new DownsamplingMethod(DownsampleType.NONE,null,null);
return new Reads(argCollection.samFiles,
return new ReadProperties(argCollection.samFiles,
argCollection.strictnessLevel,
argCollection.readBufferSize,
method,
@ -612,7 +612,7 @@ public class GenomeAnalysisEngine {
* @param tracks a collection of the reference ordered data tracks
*/
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) {
if ((reads == null && (tracks == null || tracks.isEmpty())) || reference == null )
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
return;
// Compile a set of sequence names that exist in the reference file.
@ -623,7 +623,7 @@ public class GenomeAnalysisEngine {
referenceSequenceNames.add(dictionaryEntry.getSequenceName());
if (reads != null) {
if (!reads.isEmpty()) {
// Compile a set of sequence names that exist in the BAM files.
SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary();
@ -752,7 +752,7 @@ public class GenomeAnalysisEngine {
region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
}
return new MonolithicShardStrategy(shardType,region);
return new MonolithicShardStrategy(readsDataSource,shardType,region);
}
ShardStrategy shardStrategy = null;
@ -764,7 +764,7 @@ public class GenomeAnalysisEngine {
if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
if (intervals != null && !intervals.isEmpty()) {
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
Utils.scareUser("Locus walkers can only walk over coordinate-sorted data. Please resort your input BAM file.");
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
@ -820,11 +820,7 @@ public class GenomeAnalysisEngine {
* @param reads the read source information
* @return A data source for the given set of reads.
*/
private SAMDataSource createReadsDataSource(Reads reads) {
// By reference traversals are happy with no reads. Make sure that case is handled.
if (reads.getReadsFiles().size() == 0)
return null;
private SAMDataSource createReadsDataSource(ReadProperties reads) {
return new SAMDataSource(reads);
}
@ -934,4 +930,12 @@ public class GenomeAnalysisEngine {
public List<ReferenceOrderedDataSource> getRodDataSources() {
return this.rodDataSources;
}
/**
* Gets cumulative metrics about the entire run to this point.
* @return cumulative metrics about the entire run.
*/
public ReadMetrics getCumulativeMetrics() {
return readsDataSource.getCumulativeReadMetrics();
}
}

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import net.sf.picard.filter.SamRecordFilter;
import java.util.Map;
import java.util.HashMap;
import java.util.Collections;
import org.broadinstitute.sting.utils.StingException;
/**
* Holds a bunch of basic information about the traversal.
*/
public class ReadMetrics implements Cloneable {
// Number of records (loci, reads) we've processed
private long nRecords;
// How many reads have we processed, along with those skipped for various reasons
private long nReads;
private long nSkippedReads;
private long nUnmappedReads;
private long nNotPrimary;
private long nBadAlignments;
private long nSkippedIndels;
private long nDuplicates;
private Map<Class, Long> counter = new HashMap<Class, Long>();
/**
* Combines these metrics with a set of other metrics, storing the results in this class.
* @param metrics The metrics to fold into this class.
*/
public void incrementMetrics(ReadMetrics metrics) {
nRecords += metrics.nRecords;
nReads += metrics.nReads;
nSkippedReads += metrics.nSkippedReads;
nUnmappedReads += metrics.nUnmappedReads;
nNotPrimary += metrics.nNotPrimary;
nBadAlignments += metrics.nBadAlignments;
nSkippedIndels += metrics.nSkippedIndels;
nDuplicates += metrics.nDuplicates;
for(Map.Entry<Class,Long> counterEntry: metrics.counter.entrySet()) {
Class counterType = counterEntry.getKey();
long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue();
counter.put(counterType,newValue);
}
}
/**
* Create a copy of the given read metrics.
* @return
*/
public ReadMetrics clone() {
ReadMetrics newMetrics;
try {
newMetrics = (ReadMetrics)super.clone();
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to clone runtime metrics",ex);
}
newMetrics.nRecords = nRecords;
newMetrics.nReads = nReads;
newMetrics.nSkippedReads = nSkippedReads;
newMetrics.nUnmappedReads = nUnmappedReads;
newMetrics.nNotPrimary = nNotPrimary;
newMetrics.nBadAlignments = nBadAlignments;
newMetrics.nSkippedIndels = nSkippedIndels;
newMetrics.nDuplicates = nDuplicates;
newMetrics.counter = new HashMap<Class,Long>(counter);
return newMetrics;
}
public void incrementFilter(SamRecordFilter filter) {
long c = 0;
if ( counter.containsKey(filter.getClass()) ) {
c = counter.get(filter.getClass());
}
counter.put(filter.getClass(), c + 1L);
}
public Map<Class,Long> getCountsByFilter() {
return Collections.unmodifiableMap(counter);
}
/**
* Gets the number of 'iterations' (one call of filter/map/reduce sequence) performed.
* @return The number of iterations completed.
*/
public long getNumIterations() {
return nRecords;
}
/**
* Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed.
*/
public void incrementNumIterations() {
nRecords++;
}
public long getNumReadsSeen() {
return nReads;
}
/**
* Increments the number of reads seen in the course of this run.
*/
public void incrementNumReadsSeen() {
nReads++;
}
/**
* Gets the cumulative number of reads skipped in the course of this run.
* @return Cumulative number of reads skipped in the course of this run.
*/
public long getNumSkippedReads() {
return nSkippedReads;
}
/**
* Increments the cumulative number of reads skipped in the course of this run.
*/
public void incrementNumSkippedReads() {
nSkippedReads++;
}
/**
* Gets the number of unmapped reads skipped in the course of this run.
* @return The number of unmapped reads skipped.
*/
public long getNumUnmappedReads() {
return nUnmappedReads;
}
/**
* Increments the number of unmapped reads skipped in the course of this run.
*/
public void incrementNumUnmappedReads() {
nUnmappedReads++;
}
/**
*
* @return
*/
public long getNumNonPrimaryReads() {
return nNotPrimary;
}
/**
*
*/
public void incrementNumNonPrimaryReads() {
nNotPrimary++;
}
/**
*
* @return
*/
public long getNumBadAlignments() {
return nBadAlignments;
}
/**
*
*/
public void incrementNumBadAlignments() {
nBadAlignments++;
}
/**
*
* @return
*/
public long getNumSkippedIndels() {
return nSkippedIndels;
}
/**
*
*/
public void incrementNumSkippedIndels() {
nSkippedIndels++;
}
/**
*
* @return
*/
public long getNumDuplicates() {
return nDuplicates;
}
/**
*
*/
public void incrementNumDuplicates() {
nDuplicates++;
}
}

View File

@ -25,7 +25,7 @@ import java.util.Collection;
* A data structure containing information about the reads data sources as well as
* information about how they should be downsampled, sorted, and filtered.
*/
public class Reads {
public class ReadProperties {
private List<File> readsFiles = null;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Integer readBufferSize = null;
@ -115,7 +115,7 @@ public class Reads {
* Simple constructor for unit testing.
* @param readsFiles List of reads files to open.
*/
public Reads( List<File> readsFiles ) {
public ReadProperties( List<File> readsFiles ) {
this.readsFiles = readsFiles;
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null);
this.supplementalFilters = new ArrayList<SamRecordFilter>();
@ -138,7 +138,7 @@ public class Reads {
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
*/
Reads( List<File> samFiles,
ReadProperties( List<File> samFiles,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
DownsamplingMethod downsamplingMethod,

View File

@ -435,11 +435,14 @@ public class WalkerManager extends PluginManager<Walker> {
* @param walkerClass Class of the walker to inspect.
* @return An array of types extending from SamRecordFilter. Will never be null.
*/
@SuppressWarnings("unchecked")
public static Class<? extends SamRecordFilter>[] getReadFilterTypes(Class<? extends Walker> walkerClass) {
if( !walkerClass.isAnnotationPresent(ReadFilters.class) )
return new Class[0];
return walkerClass.getAnnotation(ReadFilters.class).value();
public static Collection<Class<? extends SamRecordFilter>> getReadFilterTypes(Class<?> walkerClass) {
Set<Class<? extends SamRecordFilter>> filterTypes = new HashSet<Class<? extends SamRecordFilter>>();
while(walkerClass != null) {
if(walkerClass.isAnnotationPresent(ReadFilters.class))
filterTypes.addAll(Arrays.asList(walkerClass.getAnnotation(ReadFilters.class).value()));
walkerClass = walkerClass.getSuperclass();
}
return filterTypes;
}
/**
@ -447,7 +450,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @param walker The walker to inspect.
* @return An array of types extending from SamRecordFilter. Will never be null.
*/
public static Class<? extends SamRecordFilter>[] getReadFilterTypes(Walker walker) {
public static Collection<Class<? extends SamRecordFilter>> getReadFilterTypes(Walker walker) {
return getReadFilterTypes(walker.getClass());
}
}

View File

@ -4,7 +4,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.Collection;
@ -20,7 +20,7 @@ public class LocusShardDataProvider extends ShardDataProvider {
/**
* Information about the source of the read data.
*/
private final Reads sourceInfo;
private final ReadProperties sourceInfo;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
@ -37,7 +37,7 @@ public class LocusShardDataProvider extends ShardDataProvider {
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public LocusShardDataProvider(Shard shard, Reads sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,reference,rods);
this.sourceInfo = sourceInfo;
this.locus = locus;
@ -48,7 +48,7 @@ public class LocusShardDataProvider extends ShardDataProvider {
* Returns information about the source of the reads.
* @return Info about the source of the reads.
*/
public Reads getSourceInfo() {
public ReadProperties getSourceInfo() {
return sourceInfo;
}

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
@ -36,7 +36,7 @@ public abstract class LocusView extends LocusIterator implements View {
/**
* Source info for this view. Informs the class about downsampling requirements.
*/
private Reads sourceInfo;
private ReadProperties sourceInfo;
/**
* The actual locus context iterator.

View File

@ -139,6 +139,9 @@ public abstract class ShardDataProvider {
// Explicitly purge registered views to ensure that we don't end up with circular references
// to views, which can in turn hold state.
registeredViews.clear();
if(shard != null)
shard.close();
}
@Override

View File

@ -3,7 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.List;
import java.util.Map;
@ -19,6 +22,11 @@ import net.sf.picard.filter.SamRecordFilter;
* @date Apr 7, 2009
*/
public class LocusShard implements BAMFormatAwareShard {
/**
* Source for read data.
*/
private SAMDataSource dataSource;
/**
* A list of the chunks associated with this shard.
*/
@ -27,16 +35,30 @@ public class LocusShard implements BAMFormatAwareShard {
// currently our location
private final List<GenomeLoc> loci;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
/**
* Create a new locus shard, divided by index.
* @param intervals List of intervals to process.
* @param fileSpans File spans associated with that interval.
*/
public LocusShard(List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
public LocusShard(SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
this.dataSource = dataSource;
this.loci = intervals;
this.fileSpans = fileSpans;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
dataSource.incorporateReadMetrics(readMetrics);
}
/**
* Gets the file spans associated with this locus shard.
* @return A list of the file spans to use when retrieving locus data.
@ -104,6 +126,24 @@ public class LocusShard implements BAMFormatAwareShard {
return ShardType.LOCUS;
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return dataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
}
/**
* String representation of this shard.
* @return A string representation of the boundaries of this shard.

View File

@ -29,8 +29,10 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.*;
import java.io.File;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
@ -57,7 +59,8 @@ public class LocusShardStrategy implements ShardStrategy {
* @param locations List of locations for which to load data.
*/
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) {
if(reads != null) {
this.reads = reads;
if(!reads.isEmpty()) {
List<GenomeLoc> intervals;
if(locations == null) {
// If no locations were passed in, shard the entire BAM file.
@ -77,12 +80,10 @@ public class LocusShardStrategy implements ShardStrategy {
else
intervals = locations.toList();
this.reads = reads;
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
}
else {
final int maxShardSize = 100000;
this.reads = null;
List<FilePointer> filePointers = new ArrayList<FilePointer>();
if(locations == null) {
for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
@ -118,7 +119,7 @@ public class LocusShardStrategy implements ShardStrategy {
public LocusShard next() {
FilePointer nextFilePointer = filePointerIterator.next();
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
return new LocusShard(nextFilePointer.locations,fileSpansBounding);
return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding);
}
/** we don't support the remove command */

View File

@ -2,14 +2,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
/**
* A single, monolithic shard bridging all available data.
@ -17,6 +14,11 @@ import net.sf.samtools.SAMSequenceRecord;
* @version 0.1
*/
public class MonolithicShard implements Shard {
/**
* Reads data, if applicable.
*/
private final SAMDataSource readsDataSource;
/**
* What type of MonolithicShard is this? Read or locus?
*/
@ -27,18 +29,32 @@ public class MonolithicShard implements Shard {
*/
private final List<GenomeLoc> locs;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
/**
* Creates a new monolithic shard of the given type.
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
* @param locs Intervals that this monolithic shard should process.
*/
public MonolithicShard(ShardType shardType, List<GenomeLoc> locs) {
public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
this.readsDataSource = readsDataSource;
if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
throw new StingException("Invalid shard type for monolithic shard: " + shardType);
this.shardType = shardType;
this.locs = locs;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Returns null, indicating that (in this case) the entire genome is covered.
* @return null.
@ -51,10 +67,29 @@ public class MonolithicShard implements Shard {
* Reports the type of monolithic shard.
* @return Type of monolithic shard.
*/
@Override
public ShardType getShardType() {
return shardType;
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
}
/**
* String representation of this shard.
* @return "entire genome".

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.Iterator;
import java.util.NoSuchElementException;
@ -25,8 +27,8 @@ public class MonolithicShardStrategy implements ShardStrategy {
* Create a new shard strategy for shards of the given type.
* @param shardType The shard type.
*/
public MonolithicShardStrategy(final Shard.ShardType shardType, final List<GenomeLoc> region) {
shard = new MonolithicShard(shardType,region);
public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
shard = new MonolithicShard(readsDataSource,shardType,region);
}
/**

View File

@ -1,10 +1,12 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.*;
@ -35,10 +37,7 @@ import net.sf.picard.filter.SamRecordFilter;
* @version 0.1
*/
public class ReadShard implements BAMFormatAwareShard {
/**
* Information about the origins of reads.
*/
private final Reads sourceInfo;
private final SAMDataSource readsDataSource;
/**
* The data backing the next chunks to deliver to the traversal engine.
@ -50,17 +49,30 @@ public class ReadShard implements BAMFormatAwareShard {
*/
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
/**
* The filter to be applied to all reads meeting this criteria.
*/
private final SamRecordFilter filter;
public ReadShard(Reads sourceInfo, Map<SAMReaderID,SAMFileSpan> fileSpans, SamRecordFilter filter) {
this.sourceInfo = sourceInfo;
public ReadShard(SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, SamRecordFilter filter) {
this.readsDataSource = readsDataSource;
this.fileSpans = fileSpans;
this.filter = filter;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard.
@ -121,7 +133,7 @@ public class ReadShard implements BAMFormatAwareShard {
*/
@Override
public StingSAMIterator iterator() {
return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator());
return StingSAMIteratorAdapter.adapt(reads.iterator());
}
@Override
@ -137,7 +149,25 @@ public class ReadShard implements BAMFormatAwareShard {
@Override
public ShardType getShardType() {
return ShardType.READ;
}
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
}
/**
* String representation of this shard.

View File

@ -138,7 +138,7 @@ public class ReadShardStrategy implements ShardStrategy {
if(selectedReaders.size() > 0) {
filter = new ReadOverlapFilter(currentFilePointer.locations);
BAMFormatAwareShard shard = new ReadShard(dataSource.getReadsInfo(),selectedReaders,filter);
BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,filter);
dataSource.fillShard(shard);
if(!shard.isBufferEmpty()) {
@ -152,7 +152,7 @@ public class ReadShardStrategy implements ShardStrategy {
}
}
else {
BAMFormatAwareShard shard = new ReadShard(dataSource.getReadsInfo(),position,filter);
BAMFormatAwareShard shard = new ReadShard(dataSource,position,filter);
dataSource.fillShard(shard);
nextShard = !shard.isBufferEmpty() ? shard : null;
}

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.io.Serializable;
import java.util.List;
@ -42,4 +44,21 @@ public interface Shard extends Serializable {
* @return ShardType, indicating the type
*/
public ShardType getShardType();
/**
* Does any releasing / aggregation required when the shard is through being processed.
*/
public void close();
/**
* Gets required configuration for validating and filtering reads.
* @return read configuration properties.
*/
public ReadProperties getReadProperties();
/**
* Gets the runtime metrics associated with this shard.
* @return metrics and read counts.
*/
public ReadMetrics getReadMetrics();
}

View File

@ -37,10 +37,10 @@ import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
@ -55,7 +55,12 @@ import java.util.*;
*/
public class SAMDataSource implements SimpleDataSource {
/** Backing support for reads. */
protected final Reads reads;
protected final ReadProperties readProperties;
/**
* Runtime metrics of reads filtered, etc.
*/
protected final ReadMetrics readMetrics;
/**
* Identifiers for the readers driving this data source.
@ -91,11 +96,6 @@ public class SAMDataSource implements SimpleDataSource {
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(SAMDataSource.class);
/**
* A histogram of exactly what reads were removed from the input stream and why.
*/
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
/**
* A collection of readers driving the merging process.
*/
@ -105,13 +105,10 @@ public class SAMDataSource implements SimpleDataSource {
* Create a new SAM data source given the supplied read metadata.
* @param reads The read metadata.
*/
public SAMDataSource(Reads reads) {
this.reads = reads;
public SAMDataSource(ReadProperties reads) {
this.readProperties = reads;
this.readMetrics = new ReadMetrics();
// check the length
if (reads.getReadsFiles().size() < 1) {
throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0");
}
for (File smFile : reads.getReadsFiles()) {
if (!smFile.canRead()) {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
@ -166,7 +163,15 @@ public class SAMDataSource implements SimpleDataSource {
* information about how they are downsampled, sorted, and filtered
* @return
*/
public Reads getReadsInfo() { return reads; }
public ReadProperties getReadsInfo() { return readProperties; }
/**
* Checks to see whether any reads files are supplying data.
* @return True if no reads files are supplying data to the traversal; false otherwise.
*/
public boolean isEmpty() {
return readProperties.getReadsFiles().size() == 0;
}
/**
* Gets the SAM file associated with a given reader ID.
@ -263,11 +268,23 @@ public class SAMDataSource implements SimpleDataSource {
}
/**
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
* @return Histogram of reads. Will not be null.
* Gets the cumulative read metrics for shards already processed.
* @return Cumulative read metrics.
*/
public SAMReadViolationHistogram getViolationHistogram() {
return violations;
public ReadMetrics getCumulativeReadMetrics() {
synchronized(readMetrics) {
return readMetrics.clone();
}
}
/**
* Incorporate the given read metrics into the cumulative read metrics.
* @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics.
*/
public void incorporateReadMetrics(final ReadMetrics readMetrics) {
synchronized(this.readMetrics) {
this.readMetrics.incrementMetrics(readMetrics);
}
}
/**
@ -361,18 +378,19 @@ public class SAMDataSource implements SimpleDataSource {
if(shard.getFileSpans().get(id) == null)
continue;
CloseableIterator<SAMRecord> iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
if(reads.getReadBufferSize() != null)
iterator = new BufferingReadIterator(iterator,reads.getReadBufferSize());
if(readProperties.getReadBufferSize() != null)
iterator = new BufferingReadIterator(iterator,readProperties.getReadBufferSize());
if(shard.getFilter() != null)
iterator = new FilteringIterator(iterator,shard.getFilter()); // not a counting iterator because we don't want to show the filtering of reads
mergingIterator.addIterator(readers.getReader(id),iterator);
}
return applyDecoratingIterators(enableVerification,
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)),
reads.getDownsamplingMethod().toFraction,
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
return applyDecoratingIterators(shard.getReadMetrics(),
enableVerification,
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters());
}
/**
@ -389,11 +407,12 @@ public class SAMDataSource implements SimpleDataSource {
for(SAMReaderID id: getReaderIDs())
mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator());
return applyDecoratingIterators(shard instanceof ReadShard,
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)),
reads.getDownsamplingMethod().toFraction,
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
return applyDecoratingIterators(shard.getReadMetrics(),
shard instanceof ReadShard,
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters());
}
/**
@ -411,6 +430,7 @@ public class SAMDataSource implements SimpleDataSource {
/**
* Filter reads based on user-specified criteria.
*
* @param readMetrics metrics to track when using this iterator.
* @param enableVerification Verify the order of reads.
* @param wrappedIterator the raw data source.
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
@ -418,12 +438,12 @@ public class SAMDataSource implements SimpleDataSource {
* @param supplementalFilters additional filters to apply to the reads.
* @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null.
*/
protected StingSAMIterator applyDecoratingIterators(boolean enableVerification,
protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
boolean enableVerification,
StingSAMIterator wrappedIterator,
Double downsamplingFraction,
Boolean noValidationOfReadOrder,
Collection<SamRecordFilter> supplementalFilters) {
wrappedIterator = new MalformedSAMFilteringIterator(getHeader(),wrappedIterator,violations );
wrappedIterator = new ReadFormattingIterator(wrappedIterator);
// NOTE: this (and other filtering) should be done before on-the-fly sorting
@ -436,9 +456,7 @@ public class SAMDataSource implements SimpleDataSource {
if (!noValidationOfReadOrder && enableVerification)
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
for( SamRecordFilter supplementalFilter: supplementalFilters )
wrappedIterator = StingSAMIteratorAdapter.adapt(wrappedIterator.getSourceInfo(),
new CountingFilteringIterator(wrappedIterator,supplementalFilter));
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
return wrappedIterator;
}
@ -511,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new StingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(reads);
SAMReaders readers = new SAMReaders(readProperties);
allResources.add(readers);
availableResources.add(readers);
}
@ -531,7 +549,7 @@ public class SAMDataSource implements SimpleDataSource {
* Derive a new set of readers from the Reads metadata.
* @param sourceInfo Metadata for the reads to load.
*/
public SAMReaders(Reads sourceInfo) {
public SAMReaders(ReadProperties sourceInfo) {
for(File readsFile: sourceInfo.getReadsFiles()) {
SAMFileReader reader = new SAMFileReader(readsFile);
reader.enableFileSource(true);
@ -616,10 +634,6 @@ public class SAMDataSource implements SimpleDataSource {
*/
private final StingSAMIterator wrappedIterator;
public Reads getSourceInfo() {
return wrappedIterator.getSourceInfo();
}
public ReleasingIterator(SAMReaders resource, StingSAMIterator wrapped) {
this.resource = resource;
this.wrappedIterator = wrapped;

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
@ -192,7 +193,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
outputTracker.bypassThreadLocalStorage(true);
try {
walker.onTraversalDone(result);
printOnTraversalDone(result);
printOnTraversalDone(result,engine.getCumulativeMetrics());
}
finally {
outputTracker.bypassThreadLocalStorage(false);

View File

@ -12,12 +12,9 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import java.util.Collection;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
@ -55,7 +52,7 @@ public class LinearMicroScheduler extends MicroScheduler {
// New experimental code for managing locus intervals.
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(getReadIterator(shard), shard.getGenomeLocs(), walker.getMandatoryReadFilters(), lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
@ -74,7 +71,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = accumulator.finishTraversal();
printOnTraversalDone(result);
printOnTraversalDone(result,engine.getCumulativeMetrics());
outputTracker.close();

View File

@ -35,9 +35,9 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
@ -147,23 +147,15 @@ public abstract class MicroScheduler {
* @return an iterator over the reads specified in the shard.
*/
protected StingSAMIterator getReadIterator(Shard shard) {
return (reads != null) ? reads.seek(shard) : new NullSAMIterator(new Reads(new ArrayList<File>()));
return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(new ArrayList<File>()));
}
/**
* Print summary information for the analysis.
* @param sum The final reduce output.
*/
protected void printOnTraversalDone(Object sum) {
// HACK: The microscheduler should be too dumb to know anything about the data
// it's actually processing; it should just funnel anything it receives
// to the traversal engine.
// TODO: Implement code to allow the datasources to print summary info of the
// data they've seen.
if( reads != null && reads.getViolationHistogram().getViolationCount() > 0 )
logger.warn(String.format("%n%s",reads.getViolationHistogram()));
traversalEngine.printOnTraversalDone(sum);
protected void printOnTraversalDone(Object sum, ReadMetrics metrics) {
traversalEngine.printOnTraversalDone(metrics);
}
/**

View File

@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.StingException;
import java.util.concurrent.Callable;
@ -48,7 +47,7 @@ public class ShardTraverser implements Callable {
TraversalEngine traversalEngine,
Walker walker,
Shard shard,
ThreadLocalOutputTracker outputTracker ) {
ThreadLocalOutputTracker outputTracker) {
this.microScheduler = microScheduler;
this.walker = walker;
this.traversalEngine = traversalEngine;
@ -62,7 +61,7 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(microScheduler.getReadIterator(shard),shard.getGenomeLocs(),walker.getMandatoryReadFilters(),lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
ShardDataProvider dataProvider = null;
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {

View File

@ -1,21 +1,14 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.picard.util.PeekableIterator;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
/**
* Buffer shards of data which may or may not contain multiple loci into
@ -29,7 +22,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
/**
* Source information for iteration.
*/
private final Reads sourceInfo;
private final ReadProperties sourceInfo;
/**
* Hold the read iterator so that it can be closed later.
@ -61,14 +54,13 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
* the given intervals.
* @param iterator The data source for this window.
* @param intervals The set of intervals over which to traverse.
* @param discards a filter at that indicates read position relative to some locus?
*/
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals, List<SamRecordFilter> filters, List<LocusIteratorFilter> discards ) {
this.sourceInfo = iterator.getSourceInfo();
public WindowMaker(Shard shard, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
LocusIterator locusIterator;
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
locusIterator = new LocusIteratorByState(wrappedIterator,sourceInfo,discards);
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards);
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
@ -108,7 +100,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
seedNextLocus();
}
public Reads getSourceInfo() {
public ReadProperties getSourceInfo() {
return sourceInfo;
}

View File

@ -27,45 +27,38 @@ import net.sf.samtools.util.CloserUtil;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Collection;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.ReadMetrics;
/**
* Filtering Iterator which takes a filter and an iterator and iterates
* through only those records which are not rejected by the filter.
*
* Just a copy of a unmodifiable FilteringIterator from Picard
*
* @author Mark DePristo
*/
public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private final ReadMetrics runtimeMetrics;
private final Iterator<SAMRecord> iterator;
private final SamRecordFilter filter;
private final Collection<SamRecordFilter> filters;
private SAMRecord next = null;
/**
* Constructor
*
* @param metrics metrics to accumulate on the nature of filtered reads.
* @param iterator the backing iterator
* @param filter the filter (which may be a FilterAggregator)
* @param filters the filter (which may be a FilterAggregator)
*/
public CountingFilteringIterator(Iterator<SAMRecord> iterator, SamRecordFilter filter) {
public CountingFilteringIterator(ReadMetrics metrics, Iterator<SAMRecord> iterator, Collection<SamRecordFilter> filters) {
this.runtimeMetrics = metrics;
this.iterator = iterator;
this.filter = filter;
this.filters = filters;
next = getNextRecord();
}
/**
* Special case to count passing records
* @param iterator
*/
public CountingFilteringIterator(Iterator<SAMRecord> iterator) {
this(iterator, null);
}
/**
* Returns true if the iteration has more elements.
*
@ -111,15 +104,18 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private SAMRecord getNextRecord() {
while (iterator.hasNext()) {
SAMRecord record = iterator.next();
runtimeMetrics.incrementNumReadsSeen();
if ( filter == null ) {
TraversalStatistics.nReads++;
return record;
} else if (!filter.filterOut(record)) {
return record;
} else {
TraversalStatistics.incrementFilter(filter);
boolean filtered = false;
for(SamRecordFilter filter: filters) {
if(filter.filterOut(record)) {
runtimeMetrics.incrementFilter(filter);
filtered = true;
break;
}
}
if(!filtered) return record;
}
return null;

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
/**
* Created by IntelliJ IDEA.

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 The Broad Institute
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
@ -11,7 +12,6 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@ -22,77 +22,84 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
/**
* Validates reads against a specific set of criteria. If it finds a
* read that fails to meet the given criteria, it will throw an exception.
* The caller can decide whether to ignore the error, hide the read
* from the user, or blow up in a spectacular ball of fire.
* Filter out malformed reads.
*
* @author hanna
* @author mhanna
* @version 0.1
*/
public class SAMReadValidator {
/**
* Validate the sam read against a list of criteria that are known to cause failures in the GATK.
* Throw an exception if the read fails.
* @param read the read to validate. Must not be null.
*/
public static void validate( SAMFileHeader header, SAMRecord read ) throws SAMReadValidationException {
checkInvalidAlignmentStart(read);
checkInvalidAlignmentEnd(read);
checkAlignmentDisagreesWithHeader(header,read);
checkCigarDisagreesWithAlignment(read);
public class MalformedReadFilter implements SamRecordFilter {
public boolean filterOut(SAMRecord read) {
return !checkInvalidAlignmentStart(read) ||
!checkInvalidAlignmentEnd(read) ||
!checkAlignmentDisagreesWithHeader(GenomeAnalysisEngine.instance.getSAMFileHeader(),read) ||
!checkCigarDisagreesWithAlignment(read);
}
/**
* Check for the case in which the alignment start is inconsistent with the read unmapped flag.
* @param read The read to validate.
* @return true if read start is valid, false otherwise.
*/
private static void checkInvalidAlignmentStart( SAMRecord read ) {
private static boolean checkInvalidAlignmentStart( SAMRecord read ) {
// read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START )
throw new SAMReadValidationException("read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START");
return false;
// Read is not flagged as 'unmapped', but alignment start is -1
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == -1 )
throw new SAMReadValidationException("Read is not flagged as 'unmapped', but alignment start is -1");
return false;
return true;
}
/**
* Check for invalid end of alignments.
* @param read The read to validate.
* @return true if read end is valid, false otherwise.
*/
private static void checkInvalidAlignmentEnd( SAMRecord read ) {
private static boolean checkInvalidAlignmentEnd( SAMRecord read ) {
// Alignment ends prior to its beginning
if( !read.getReadUnmappedFlag() && read.getAlignmentEnd() != -1 && read.getAlignmentEnd() < read.getAlignmentStart() )
throw new SAMReadValidationException("Alignment ends prior to its beginning");
return false;
return true;
}
/**
* Check to ensure that the alignment makes sense based on the contents of the header.
* @param header The SAM file header.
* @param read The read to verify.
* @return true if alignment agrees with header, false othrewise.
*/
private static void checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) {
private static boolean checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) {
// Read is aligned to nonexistent contig
if( read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START )
throw new SAMReadValidationException("Read is aligned to nonexistent contig");
return false;
SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() );
// Read is aligned to a point after the end of the contig
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() )
throw new SAMReadValidationException("Read is aligned to a point after the end of the contig");
return false;
return true;
}
/**
* Check for inconsistencies between the cigar string and the
* Check for inconsistencies between the cigar string and the
* @param read The read to validate.
* @return true if cigar agrees with alignment, false otherwise.
*/
private static void checkCigarDisagreesWithAlignment( SAMRecord read ) {
private static boolean checkCigarDisagreesWithAlignment(SAMRecord read) {
// Read has a valid alignment start, but the CIGAR string is empty
if( !read.getReadUnmappedFlag() &&
read.getAlignmentStart() != -1 &&
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START &&
read.getAlignmentBlocks().size() == 0 )
throw new SAMReadValidationException("Read has a valid alignment start, but the CIGAR string is empty");
return false;
return true;
}
}

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
/**
* Created by IntelliJ IDEA.

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
/**
* Created by IntelliJ IDEA.

View File

@ -6,7 +6,7 @@ import net.sf.picard.sam.MergingSamRecordIterator;
import java.util.Iterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
/*
* Copyright (c) 2009 The Broad Institute
@ -76,15 +76,6 @@ public class BoundedReadIterator implements StingSAMIterator {
this.doNotUseThatUnmappedReadPile = useThem;
}
/**
* Retrieves information about reads sources.
* @return Info about the sources of reads.
*/
public Reads getSourceInfo() {
return iterator.getSourceInfo();
}
public SAMFileHeader getHeader() {
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
// regardless of the merging

View File

@ -5,7 +5,7 @@ import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.Random;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
public class DownsampleIterator implements StingSAMIterator {
@ -22,15 +22,6 @@ public class DownsampleIterator implements StingSAMIterator {
next = getNextRecord();
}
/**
* Retrieves information about reads sources.
* @return Info about the sources of reads.
*/
public Reads getSourceInfo() {
return it.getSourceInfo();
}
public boolean hasNext() {
return next != null;
}

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.*;
import net.sf.picard.util.PeekableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.DownsampleType;
@ -264,7 +264,7 @@ public class LocusIteratorByState extends LocusIterator {
//final boolean DEBUG = false;
//final boolean DEBUG2 = false && DEBUG;
private Reads readInfo;
private ReadProperties readInfo;
private AlignmentContext nextAlignmentContext;
private List<LocusIteratorFilter> filters = new ArrayList<LocusIteratorFilter>();
@ -273,11 +273,11 @@ public class LocusIteratorByState extends LocusIterator {
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation ) {
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation ) {
this(samIterator, readInformation, NO_FILTERS);
}
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation, List<LocusIteratorFilter> filters ) {
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) {
// Aggregate all sample names.
// TODO: Push in header via constructor
if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) {

View File

@ -1,141 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
import org.broadinstitute.sting.utils.sam.SAMReadValidationException;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.util.NoSuchElementException;
/**
* A decorating iterator that examines the stream of reads, discarding those
* that fail to meet a minimum standard for consumption by the GATK.
*
* @author hanna
* @version 0.1
*/
public class MalformedSAMFilteringIterator implements StingSAMIterator {
/**
* The header to validate reads against.
*/
private SAMFileHeader header = null;
/**
* The wrapped iterator. Get reads from here.
*/
private StingSAMIterator wrapped = null;
/**
* Collector for SAM read violations.
*/
private SAMReadViolationHistogram violations = null;
/**
* The next SAMRecord to return.;
*/
private SAMRecord next = null;
/**
* Creates a new MalformedSAMFilteringIterator, and provides a collector for the count
* @param wrapped The wrapped iterator to use as backing data.
* @param violations A structure to hold a breakdown of validator violations.
*/
public MalformedSAMFilteringIterator( SAMFileHeader header, StingSAMIterator wrapped, SAMReadViolationHistogram violations ) {
this.header = header;
this.wrapped = wrapped;
this.violations = violations;
seedNext();
}
/**
* Returns source information about the reads.
* @return
*/
public Reads getSourceInfo() {
return wrapped.getSourceInfo();
}
/**
* Gets an iterator, helpful for foreach loops.
* @return An iterator sharing the same state variables as the current iterator.
*/
public StingSAMIterator iterator() {
return this;
}
/**
* Checks to see whether there's a
* @return True if a next is available, false otherwise.
*/
public boolean hasNext() {
return next != null;
}
/**
* Gets the next valid record from the stream.
* @return Next valid record.
*/
public SAMRecord next() {
SAMRecord current = next;
if( current == null )
throw new NoSuchElementException("MalformedSAMFilteringIterator: supply of reads is exhausted.");
seedNext();
return current;
}
/**
* Closes the wrapped iterator.
*/
public void close() {
wrapped.close();
}
/**
* Looks ahead for the next valid SAMRecord.
*/
protected void seedNext() {
next = null;
while( wrapped.hasNext() && next == null ) {
SAMRecord toTest = wrapped.next();
try {
SAMReadValidator.validate(header,toTest);
next = toTest;
}
catch ( SAMReadValidationException ex ) {
violations.addViolation(ex);
}
}
}
/**
* Throws an exception. Remove is not supported.
*/
public void remove() { throw new UnsupportedOperationException("Unable to remove from a StingSAMIterator"); }
}

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
@ -22,11 +22,10 @@ import java.util.NoSuchElementException;
* A placeholder for an iterator with no data.
*/
public class NullSAMIterator implements StingSAMIterator {
private Reads reads = null;
private ReadProperties reads = null;
public NullSAMIterator( Reads reads ) { this.reads = reads; }
public NullSAMIterator( ReadProperties reads ) { this.reads = reads; }
public Reads getSourceInfo() { return reads; }
public Iterator<SAMRecord> iterator() { return this; }
public void close() { /* NO-OP */ }

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
@ -35,11 +35,6 @@ import net.sf.samtools.util.CloseableIterator;
* @version 0.1
*/
public class PositionTrackingIterator implements StingSAMIterator {
/**
* Source information about the reads.
*/
private Reads sourceInfo;
/**
* The iterator being tracked.
*/
@ -50,13 +45,6 @@ public class PositionTrackingIterator implements StingSAMIterator {
*/
private long position;
/**
* {@inheritDoc}
*/
public Reads getSourceInfo() {
return sourceInfo;
}
/**
* Retrieves the current position of the iterator. The 'current position' of the iterator is defined as
* the coordinate of the read that will be returned if next() is called.
@ -69,12 +57,10 @@ public class PositionTrackingIterator implements StingSAMIterator {
/**
* Create a new iterator wrapping the given position, assuming that the reader is <code>position</code> reads
* into the sequence.
* @param sourceInfo Information about where these reads came from.
* @param iterator Iterator to wraps.
* @param position Non-negative position where the iterator currently sits.
*/
public PositionTrackingIterator( Reads sourceInfo, CloseableIterator<SAMRecord> iterator, long position ) {
this.sourceInfo = sourceInfo;
public PositionTrackingIterator(CloseableIterator<SAMRecord> iterator, long position ) {
this.iterator = iterator;
this.position = position;
}

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.apache.log4j.Logger;
@ -35,14 +35,6 @@ public class ReadFormattingIterator implements StingSAMIterator {
this.wrappedIterator = wrappedIterator;
}
/**
* Get metadata about the reads' sources, etc.
* @return Source info about the reads.
*/
public Reads getSourceInfo() {
return wrappedIterator.getSourceInfo();
}
/**
* Convenience function for use in foreach loops. Dangerous because it does not actually
* reset the iterator.

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
/**
*
* User: aaron
@ -29,10 +29,4 @@ import org.broadinstitute.sting.gatk.Reads;
* This is the standard interface for all iterators in the Sting package that iterate over SAMRecords
*/
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
/**
* Gets source information for the reads. Contains information about the original reads
* files, plus information about downsampling, etc.
* @return
*/
public Reads getSourceInfo();
}

View File

@ -5,7 +5,7 @@ import net.sf.samtools.util.CloseableIterator;
import java.util.Iterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.StingException;
/**
@ -36,12 +36,12 @@ import org.broadinstitute.sting.utils.StingException;
*/
public class StingSAMIteratorAdapter {
public static StingSAMIterator adapt(Reads sourceInfo, Iterator<SAMRecord> iter) {
return new PrivateStringSAMIterator(sourceInfo, iter);
public static StingSAMIterator adapt(Iterator<SAMRecord> iter) {
return new PrivateStringSAMIterator(iter);
}
public static StingSAMIterator adapt(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
return new PrivateStringSAMCloseableIterator(sourceInfo, iter);
public static StingSAMIterator adapt(CloseableIterator<SAMRecord> iter) {
return new PrivateStringSAMCloseableIterator(iter);
}
}
@ -52,20 +52,12 @@ public class StingSAMIteratorAdapter {
* methods that implement the iterable<> interface and the close() method from CloseableIterator
*/
class PrivateStringSAMIterator implements StingSAMIterator {
private Reads sourceInfo = null;
private Iterator<SAMRecord> iter = null;
PrivateStringSAMIterator(Reads sourceInfo, Iterator<SAMRecord> iter) {
this.sourceInfo = sourceInfo;
PrivateStringSAMIterator(Iterator<SAMRecord> iter) {
this.iter = iter;
}
public Reads getSourceInfo() {
if( sourceInfo == null )
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
return sourceInfo;
}
public void close() {
// do nothing, we can't close the iterator anyway.
}
@ -93,20 +85,12 @@ class PrivateStringSAMIterator implements StingSAMIterator {
* methods that implement the iterable<> interface.
*/
class PrivateStringSAMCloseableIterator implements StingSAMIterator {
private Reads sourceInfo = null;
private CloseableIterator<SAMRecord> iter = null;
PrivateStringSAMCloseableIterator(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
this.sourceInfo = sourceInfo;
PrivateStringSAMCloseableIterator(CloseableIterator<SAMRecord> iter) {
this.iter = iter;
}
public Reads getSourceInfo() {
if( sourceInfo == null )
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
return sourceInfo;
}
public void close() {
iter.close();
}

View File

@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.RuntimeIOException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.Iterator;
@ -24,15 +24,6 @@ public class VerifyingSamIterator implements StingSAMIterator {
this.it = it;
}
/**
* Retrieves information about reads sources.
* @return Info about the sources of reads.
*/
public Reads getSourceInfo() {
return it.getSourceInfo();
}
public boolean hasNext() { return this.it.hasNext(); }
public SAMRecord next() {

View File

@ -2,18 +2,15 @@ package org.broadinstitute.sting.gatk.traversals;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.Map;
import java.util.List;
import java.util.Iterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,ProviderType extends ShardDataProvider> {
// Time in milliseconds since we initialized this engine
@ -27,6 +24,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
/**
* Gets the named traversal type associated with the given traversal.
* @return A user-friendly name for the given traversal type.
*/
protected abstract String getTraversalType();
/**
* @param curTime (current runtime, in millisecs)
*
@ -39,23 +42,27 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/**
* Forward request to printProgress
*
* @param type the TRAVERSAL_TYPE of the traversal
* @param shard the given shard currently being processed.
* @param loc the location
*/
public void printProgress(final String type, GenomeLoc loc) {
printProgress(false, type, loc);
public void printProgress(Shard shard,GenomeLoc loc) {
// A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
ReadMetrics cumulativeMetrics = GenomeAnalysisEngine.instance != null ? GenomeAnalysisEngine.instance.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false);
}
/**
* Utility routine that prints out process information (including timing) every N records or
* every M seconds, for N and M set in global variables.
*
* @param mustPrint If true, will print out info, regardless of nRecords or time interval
* @param type String to print out describing our atomic traversal type ("read", "locus", etc)
* @param loc Current location
* @param metrics Metrics of reads filtered in/out.
* @param mustPrint If true, will print out info, regardless of nRecords or time interval
*/
private void printProgress(boolean mustPrint, final String type, GenomeLoc loc) {
final long nRecords = TraversalStatistics.nRecords;
private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) {
final long nRecords = metrics.getNumIterations();
final long curTime = System.currentTimeMillis();
final double elapsed = (curTime - startTime) / 1000.0;
//System.out.printf("Cur = %d, last print = %d, elapsed=%.2f, nRecords=%d, met=%b%n", curTime, lastProgressPrintTime, elapsed, nRecords, maxElapsedIntervalForPrinting(curTime));
@ -64,44 +71,35 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
this.lastProgressPrintTime = curTime;
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
if (loc != null)
logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, type, elapsed, secsPer1MReads, type));
logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, getTraversalType(), elapsed, secsPer1MReads, getTraversalType()));
else
logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, type, elapsed, secsPer1MReads, type));
logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, getTraversalType(), elapsed, secsPer1MReads, getTraversalType()));
}
}
/**
* A passthrough method so that subclasses can report which types of traversals they're using.
*
* @param sum Result of the computation.
*/
public abstract void printOnTraversalDone(T sum);
/**
* Called after a traversal to print out information about the traversal process
*
* @param type describing this type of traversal
* @param sum The reduce result of the traversal
*/
protected void printOnTraversalDone(final String type, T sum) {
printProgress(true, type, null);
public void printOnTraversalDone(ReadMetrics cumulativeMetrics) {
printProgress(null, cumulativeMetrics, true);
final long curTime = System.currentTimeMillis();
final double elapsed = (curTime - startTime) / 1000.0;
// count up the number of skipped reads by summing over all filters
long nSkippedReads = 0L;
for ( long counts : TraversalStatistics.counter.values() )
nSkippedReads += counts;
for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())
nSkippedReads += countsByFilter.getValue();
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours%n", elapsed, elapsed / 60, elapsed / 3600));
logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)",
nSkippedReads,
TraversalStatistics.nReads,
100.0 * MathUtils.ratio(nSkippedReads, TraversalStatistics.nReads)));
for ( Map.Entry<Class, Long> filterCounts : TraversalStatistics.counter.entrySet() ) {
cumulativeMetrics.getNumReadsSeen(),
100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen())));
for ( Map.Entry<Class, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
long count = filterCounts.getValue();
logger.info(String.format(" -> %d reads (%.2f%% of total) failing %s",
count, 100.0 * MathUtils.ratio(count, TraversalStatistics.nReads), Utils.getClassName(filterCounts.getKey())));
count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey())));
}
}
@ -122,15 +120,4 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
public abstract T traverse(WalkerType walker,
ProviderType dataProvider,
T sum);
public static Iterator<SAMRecord> addMandatoryFilteringIterators(Iterator<SAMRecord> iter, List<SamRecordFilter> filters ) {
for( SamRecordFilter filter : filters ) {
//logger.debug("Adding filter " + filter.getClass());
iter = new CountingFilteringIterator(iter,filter);
}
return new CountingFilteringIterator(iter); // special case to count all reads
}
}

View File

@ -1,55 +0,0 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.picard.filter.SamRecordFilter;
import java.util.Map;
import java.util.HashMap;
import org.broadinstitute.sting.utils.Utils;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Apr 8, 2009
* Time: 4:13:40 PM
*
* Holds a bunch of basic information about the traversal.
* TODO: Make this a class that can be passed around from the TraversalEngine to other entries that want to update it.
*/
public class TraversalStatistics {
// Number of records (loci, reads) we've processed
public static long nRecords;
// How many reads have we processed, along with those skipped for various reasons
public static long nReads;
public static long nSkippedReads;
public static long nUnmappedReads;
public static long nNotPrimary;
public static long nBadAlignments;
public static long nSkippedIndels;
public static long nDuplicates;
public static Map<Class, Long> counter = new HashMap<Class, Long>();
static {
reset();
}
public static void incrementFilter(SamRecordFilter filter) {
long c = 0;
if ( counter.containsKey(filter.getClass()) ) {
c = counter.get(filter.getClass());
}
counter.put(filter.getClass(), c + 1L);
}
public static void reset() {
nRecords = 0;
nReads = 0;
nSkippedReads = 0;
nUnmappedReads = 0;
nNotPrimary = 0;
nBadAlignments = 0;
nSkippedIndels = 0;
nDuplicates = 0;
}
}

View File

@ -25,9 +25,7 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
@ -52,12 +50,14 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseDuplicates.class);
/** descriptor of the type */
private static final String DUPS_STRING = "dups";
/** Turn this to true to enable logger.debug output */
private final boolean DEBUG = false;
@Override
protected String getTraversalType() {
return "dups";
}
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
@ -165,8 +165,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
public T traverse(DuplicateWalker<M, T> walker,
ReadShardDataProvider dataProvider,
T sum) {
Iterator<SAMRecord> filterIter = addMandatoryFilteringIterators(new ReadView(dataProvider).iterator(), walker.getMandatoryReadFilters());
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(new ReadView(dataProvider).iterator());
/**
* while we still have more reads:
@ -186,7 +185,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
AlignmentContext locus = new AlignmentContext(site, new ReadBackedPileupImpl(site));
// update the number of duplicate sets we've seen
TraversalStatistics.nRecords++;
dataProvider.getShard().getReadMetrics().incrementNumIterations();
// actually call filter and map, accumulating sum
final boolean keepMeP = walker.filter(site, locus, readSets);
@ -195,18 +194,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
sum = walker.reduce(x, sum);
}
printProgress(DUPS_STRING, site);
printProgress(dataProvider.getShard(),site);
}
return sum;
}
/**
* Temporary override of printOnTraversalDone.
*
* @param sum Result of the computation.
*/
public void printOnTraversalDone(T sum) {
printOnTraversalDone(DUPS_STRING, sum);
}
}

View File

@ -17,17 +17,20 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
* A simple solution to iterating over all reference positions over a series of genomic locations.
*/
public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
final private static String LOCI_STRING = "sites";
/**
* our log, which we want to capture anything from this class
*/
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
@Override
protected String getTraversalType() {
return "sites";
}
@Override
public T traverse( LocusWalker<M,T> walker,
LocusShardDataProvider dataProvider,
T sum ) {
T sum) {
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
LocusView locusView = getLocusView( walker, dataProvider );
@ -48,7 +51,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
AlignmentContext locus = locusView.next();
GenomeLoc location = locus.getLocation();
TraversalStatistics.nRecords++;
dataProvider.getShard().getReadMetrics().incrementNumIterations();
if ( locus.hasExtendedEventPileup() ) {
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
@ -76,7 +79,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
sum = walker.reduce(x, sum);
}
printProgress(LOCI_STRING, locus.getLocation());
printProgress(dataProvider.getShard(),locus.getLocation());
}
}
@ -96,15 +99,6 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
return sum;
}
/**
* Temporary override of printOnTraversalDone.
*
* @param sum Result of the computation.
*/
public void printOnTraversalDone( T sum ) {
printOnTraversalDone(LOCI_STRING, sum );
}
/**
* Gets the best view of loci for this walker given the available data.
* @param walker walker to interrogate.

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordCoordinateComparator;
@ -24,8 +24,10 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReadPairs.class);
/** descriptor of the type */
private static final String PAIRS_STRING = "read pairs";
@Override
protected String getTraversalType() {
return "read pairs";
}
/**
* Traverse by reads, given the data and the walker
@ -47,7 +49,7 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
List<SAMRecord> pairs = new ArrayList<SAMRecord>();
for(SAMRecord read: reads) {
TraversalStatistics.nReads++;
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
// If this read name is the same as the last, accumulate it.
@ -55,17 +57,17 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
}
else {
// Otherwise, walk over the accumulated list, then start fresh with the new read.
sum = walkOverPairs(walker,pairs,sum);
sum = walkOverPairs(walker,dataProvider.getShard(),pairs,sum);
pairs.clear();
pairs.add(read);
printProgress(PAIRS_STRING, null);
printProgress(dataProvider.getShard(),null);
}
}
// If any data was left in the queue, process it.
if(pairs.size() > 0)
sum = walkOverPairs(walker,pairs,sum);
sum = walkOverPairs(walker,dataProvider.getShard(),pairs,sum);
return sum;
}
@ -73,13 +75,14 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
/**
* Filter / map / reduce over a single pair.
* @param walker The walker.
* @param shard The shard currently being processed.
* @param reads The reads in the pair.
* @param sum The accumulator.
* @return The accumulator after application of the given read pairing.
*/
private T walkOverPairs(ReadPairWalker<M,T> walker, List<SAMRecord> reads, T sum) {
private T walkOverPairs(ReadPairWalker<M,T> walker, Shard shard, List<SAMRecord> reads, T sum) {
// update the number of reads we've seen
TraversalStatistics.nRecords++;
shard.getReadMetrics().incrementNumIterations();
// Sort the reads present in coordinate order.
Collections.sort(reads,new SAMRecordCoordinateComparator());
@ -92,13 +95,4 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
return sum;
}
/**
* Temporary override of printOnTraversalDone.
*
* @param sum Result of the computation.
*/
public void printOnTraversalDone(T sum) {
printOnTraversalDone(PAIRS_STRING, sum);
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
@ -49,8 +50,10 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReads.class);
/** descriptor of the type */
private static final String READS_STRING = "reads";
@Override
protected String getTraversalType() {
return "reads";
}
/**
* Traverse by reads, given the data and the walker
@ -87,8 +90,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
refContext = reference.getReferenceContext(read);
// update the number of reads we've seen
TraversalStatistics.nRecords++;
TraversalStatistics.nReads++;
ReadMetrics readMetrics = dataProvider.getShard().getReadMetrics();
readMetrics.incrementNumIterations();
readMetrics.incrementNumReadsSeen();
// if the read is mapped, create a metadata tracker
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
@ -99,20 +103,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
sum = walker.reduce(x, sum);
}
printProgress(READS_STRING,
(read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) ?
null :
GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()));
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart());
printProgress(dataProvider.getShard(),locus);
}
return sum;
}
/**
* Temporary override of printOnTraversalDone.
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
* @param sum Result of the computation.
*/
public void printOnTraversalDone( T sum ) {
printOnTraversalDone(READS_STRING, sum );
}
}

View File

@ -1,15 +1,12 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
import java.util.Set;
import java.util.ArrayList;
import java.util.Arrays;
import net.sf.samtools.SAMRecord;
@ -23,6 +20,7 @@ import net.sf.picard.filter.SamRecordFilter;
* To change this template use File | Settings | File Templates.
*/
@Requires({DataSource.READS,DataSource.REFERENCE})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
@ -34,20 +32,4 @@ public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapTyp
// Given result of map function
public abstract ReduceType reduceInit();
public abstract ReduceType reduce(MapType value, ReduceType sum);
// --------------------------------------------------------------------------------------------------------------
//
// read filters
//
// --------------------------------------------------------------------------------------------------------------
public List<SamRecordFilter> getMandatoryReadFilters() {
SamRecordFilter filter1 = new UnmappedReadFilter();
SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter();
List<SamRecordFilter> x = super.getMandatoryReadFilters();
x.addAll(Arrays.asList(filter2, filter1));
return x;
}
}

View File

@ -4,15 +4,11 @@ import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Arrays;
import java.util.EnumSet;
/**
* Created by IntelliJ IDEA.
@ -23,6 +19,7 @@ import java.util.EnumSet;
*/
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -32,28 +29,6 @@ public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, R
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
// --------------------------------------------------------------------------------------------------------------
//
// mandatory read filters
//
// --------------------------------------------------------------------------------------------------------------
public List<SamRecordFilter> getMandatoryReadFilters() {
// if ( false ) {
// SamRecordFilter filter = new LocusStreamFilterFunc();
// return Arrays.asList(filter);
// } else {
SamRecordFilter filter1 = new UnmappedReadFilter();
SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter();
SamRecordFilter filter3 = new DuplicateReadFilter();
SamRecordFilter filter4 = new FailsVendorQualityCheckReadFilter();
List<SamRecordFilter> x = super.getMandatoryReadFilters();
x.addAll(Arrays.asList(filter4, filter3, filter2, filter1));
// }
return x;
}
/**
* Returns the set of locus iterator discards that this walker wants the engine to discard automatically
*

View File

@ -30,6 +30,7 @@ import java.util.List;
import java.util.ArrayList;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.apache.log4j.Logger;
@ -42,6 +43,7 @@ import net.sf.picard.filter.SamRecordFilter;
* Time: 1:53:31 PM
* To change this template use File | Settings | File Templates.
*/
@ReadFilters(MalformedReadFilter.class)
public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class);
@ -135,15 +137,6 @@ public abstract class Walker<MapType, ReduceType> {
out.println("[REDUCE RESULT] Traversal result is: " + result);
}
/**
* Returns a list of SamRecordFilters that *must* be applied to the read stream for the traversal to work
* @return a list of SamRecordFilters to apply in order
*/
public List<SamRecordFilter> getMandatoryReadFilters() {
return new ArrayList<SamRecordFilter>(); // by default
}
/**
* General interval reduce routine called after all of the traversals are done
* @param results

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
@ -71,6 +70,11 @@ public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,
readOrder = readOrdering;
}
@Override
protected String getTraversalType() {
return "reads";
}
/**
* Traverse by reads, given the data and the walker
*
@ -81,8 +85,8 @@ public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,
* @return the reduce variable of the read walker
*/
public T traverse( Walker<M, T> walker,
ShardDataProvider dataProvider,
T sum ) {
ShardDataProvider dataProvider,
T sum ) {
if (!( walker instanceof ReadWalker ))
throw new IllegalArgumentException("Walker isn't a read walker!");
@ -99,35 +103,15 @@ public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,
// while we still have more reads
for (SAMRecord read : iter) {
// our alignment context
AlignmentContext alignment = null;
// an array of characters that represent the reference
ReferenceContext refSeq = null;
// update the number of reads we've seen
TraversalStatistics.nRecords++;
final boolean keepMeP = readWalker.filter(refSeq, read);
if (keepMeP) {
M x = readWalker.map(refSeq, read, null); // TODO: fix me at some point, it would be nice to fake out ROD data too
sum = readWalker.reduce(x, sum);
}
if (alignment != null) { printProgress("reads", alignment.getLocation()); }
}
return sum;
}
/**
* Temporary override of printOnTraversalDone.
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
*
* @param sum Result of the computation.
*/
public void printOnTraversalDone( T sum ) {
printOnTraversalDone("reads", sum);
}
}

View File

@ -4,19 +4,15 @@ import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.util.CloseableIterator;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.io.File;
import java.util.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.gatk.Reads;
/**
* User: hanna
* Date: Jun 11, 2009

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
@ -105,11 +105,6 @@ public class ArtificialSAMIterator implements StingSAMIterator {
reset();
}
public Reads getSourceInfo() {
throw new UnsupportedOperationException("We don't support this");
}
public void close() {
open = false;
}

View File

@ -1,52 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.utils.StingException;
/**
* Represents a validation failure, usually triggered by an inconsistency internal to the read.
* @author hanna
* @version 0.1
*/
public class SAMReadValidationException extends StingException {
/**
* Create a validation exception with only a message; no other traceback info is provided.
* @param message The message to pass along to the user.
*/
public SAMReadValidationException(String message) {
super(message);
}
/**
* Create a validation exception with a message and traceback info.
* @param message The message to pass along to the user.
* @param inner The exception to nest.
*/
public SAMReadValidationException(String message,Throwable inner) {
super(message,inner);
}
}

View File

@ -1,69 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import java.util.*;
/**
* Collects a series of violations to our SAM read validation criteria.
*
* @author hanna
* @version 0.1
*/
public class SAMReadViolationHistogram {
private Map<String,Long> violations = new HashMap<String,Long>();
/**
* Add a violation to the database of violations. For now, track
* only the number of occurrrences of a given violation.
* @param violation Violation to add, generated by the SAMReadValidator.
*/
public void addViolation( SAMReadValidationException violation ) {
String message = violation.getMessage();
if( !violations.containsKey( message ) )
violations.put( message, 0L );
violations.put(message,violations.get(message)+1);
}
public long getViolationCount() {
long totalViolations = 0L;
Collection<Long> violationCounts = violations.values();
for( Long violationCount: violationCounts )
totalViolations += violationCount;
return totalViolations;
}
public String toString() {
if( getViolationCount() == 0 )
return "";
StringBuilder violationOutput = new StringBuilder();
violationOutput.append("Eliminated malformed reads for the following reasons:\n");
for(Map.Entry<String,Long> violation: violations.entrySet())
violationOutput.append( String.format("\t%s: %d%n", violation.getKey(), violation.getValue()) );
return violationOutput.toString();
}
}

View File

@ -5,12 +5,13 @@ import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -50,8 +51,8 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.<File>emptyList())),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);
@ -67,7 +68,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new MockLocusShard(Collections.singletonList(shardBounds));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
@ -82,7 +83,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -96,7 +97,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -110,7 +111,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -124,7 +125,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -138,7 +139,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -153,7 +154,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -172,7 +173,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -191,7 +192,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -212,7 +213,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -240,7 +241,7 @@ public abstract class LocusViewTemplate extends BaseTest {
read07, read08, read09, read10, read11, read12);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList<SamRecordFilter>(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -336,11 +337,6 @@ public abstract class LocusViewTemplate extends BaseTest {
backingIterator = backingList.iterator();
}
public Reads getSourceInfo() {
// There are no sources for these reads.
return new Reads(new ArrayList<File>());
}
public boolean hasNext() {
return backingIterator.hasNext();
}

View File

@ -25,8 +25,12 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.List;
import java.util.Collections;
import java.io.File;
/**
* A mock locus shard, usable for infrastructure that requires a shard to behave properly.
@ -36,6 +40,6 @@ import java.util.List;
*/
public class MockLocusShard extends LocusShard {
public MockLocusShard(final List<GenomeLoc> intervals) {
super(intervals,null);
super(new SAMDataSource(new ReadProperties(Collections.<File>emptyList())),intervals,null);
}
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import static junit.framework.Assert.fail;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
@ -9,7 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.After;
@ -85,7 +84,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the data
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"));
Reads reads = new Reads(fl);
ReadProperties reads = new ReadProperties(fl);
// the sharding strat.
SAMDataSource data = new SAMDataSource(reads);
@ -130,7 +129,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the test files
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"));
Reads reads = new Reads(fl);
ReadProperties reads = new ReadProperties(fl);
// the sharding strat.
SAMDataSource data = new SAMDataSource(reads);
@ -172,7 +171,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
fl.clear();
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"));
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"));
reads = new Reads(fl);
reads = new ReadProperties(fl);
count = 0;
// the sharding strat.

View File

@ -7,7 +7,7 @@ import net.sf.samtools.SAMRecord;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -101,15 +101,6 @@ class testIterator implements StingSAMIterator {
testIterator() {
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,2000);
}
/**
* Gets source information for the reads. Contains information about the original reads
* files, plus information about downsampling, etc.
*
* @return
*/
public Reads getSourceInfo() {
return null;
}
public void close() {

View File

@ -1,20 +1,16 @@
package org.broadinstitute.sting.gatk.iterators;
import junit.framework.Assert;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -44,8 +40,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
ReadProperties readAttributes = new ReadProperties(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
@ -96,8 +92,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
ReadProperties readAttributes = new ReadProperties(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
@ -144,8 +140,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
// create a test version of the Reads object
Reads reads = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
ReadProperties reads = new ReadProperties(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
@ -170,8 +166,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
// create a test version of the Reads object
Reads reads = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
ReadProperties reads = new ReadProperties(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);

View File

@ -97,7 +97,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest {
final int COUNT = 100;
MyTestIterator it = new MyTestIterator();
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
int countCheck = 0;
while (samIt.hasNext()) {
samIt.next();
@ -116,7 +116,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest {
MyTestCloseableIterator it = new MyTestCloseableIterator();
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
int countCheck = 0;
while (samIt.hasNext()) {
@ -133,7 +133,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest {
MyTestCloseableIterator it = new MyTestCloseableIterator();
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
int countCheck = 0;

View File

@ -3,7 +3,8 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
@ -109,7 +110,7 @@ public class TraverseReadsUnitTest extends BaseTest {
ref = new IndexedFastaSequenceFile(refFile);
GenomeLocParser.setupRefContigOrdering(ref);
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
SAMDataSource dataSource = new SAMDataSource(new ReadProperties(bamList));
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
ref.getSequenceDictionary(),
readSize);
@ -129,7 +130,7 @@ public class TraverseReadsUnitTest extends BaseTest {
dataProvider.close();
}
traversalEngine.printOnTraversalDone("reads", accumulator);
traversalEngine.printOnTraversalDone(new ReadMetrics());
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {