From 41d57b713998217852a35901ffa0db7ba1a298ef Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 11 Aug 2010 20:17:11 +0000 Subject: [PATCH] Massive cleanup of read filtering. - Eliminate reduncancy of filter application. - Track filter metrics per-shard to facitate per merging. - Flatten counting iterator hierarchy for easier debugging. - Rename Reads class to ReadProperties and track it outside of the Sting iterators. Note: because shards are currently tied so closely to reads and not the merged triplet of , the metrics classes are managed by the SAMDataSource when they should be managed by something more general. For now, we're hacking the reads data source to manage the metrics; in the future, something more general should manage the metrics classes. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4015 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 26 +- .../sting/gatk/ReadMetrics.java | 226 ++++++++++++++++++ .../gatk/{Reads.java => ReadProperties.java} | 6 +- .../sting/gatk/WalkerManager.java | 15 +- .../providers/LocusShardDataProvider.java | 8 +- .../gatk/datasources/providers/LocusView.java | 4 +- .../providers/ShardDataProvider.java | 3 + .../gatk/datasources/shards/LocusShard.java | 42 +++- .../shards/LocusShardStrategy.java | 9 +- .../datasources/shards/MonolithicShard.java | 49 +++- .../shards/MonolithicShardStrategy.java | 6 +- .../gatk/datasources/shards/ReadShard.java | 48 +++- .../datasources/shards/ReadShardStrategy.java | 4 +- .../sting/gatk/datasources/shards/Shard.java | 19 ++ .../simpleDataSources/SAMDataSource.java | 98 ++++---- .../executive/HierarchicalMicroScheduler.java | 3 +- .../gatk/executive/LinearMicroScheduler.java | 7 +- .../sting/gatk/executive/MicroScheduler.java | 18 +- .../sting/gatk/executive/ShardTraverser.java | 5 +- .../sting/gatk/executive/WindowMaker.java | 24 +- .../filters/CountingFilteringIterator.java | 42 ++-- .../FailsVendorQualityCheckReadFilter.java | 1 - .../filters/MalformedReadFilter.java} | 69 +++--- .../NotPrimaryAlignmentReadFilter.java | 1 - .../gatk/filters/UnmappedReadFilter.java | 1 - .../gatk/iterators/BoundedReadIterator.java | 11 +- .../gatk/iterators/DownsampleIterator.java | 11 +- .../gatk/iterators/LocusIteratorByState.java | 8 +- .../MalformedSAMFilteringIterator.java | 141 ----------- .../sting/gatk/iterators/NullSAMIterator.java | 7 +- .../iterators/PositionTrackingIterator.java | 18 +- .../iterators/ReadFormattingIterator.java | 10 +- .../gatk/iterators/StingSAMIterator.java | 8 +- .../iterators/StingSAMIteratorAdapter.java | 30 +-- .../gatk/iterators/VerifyingSamIterator.java | 11 +- .../gatk/traversals/TraversalEngine.java | 75 +++--- .../gatk/traversals/TraversalStatistics.java | 55 ----- .../gatk/traversals/TraverseDuplicates.java | 26 +- .../sting/gatk/traversals/TraverseLoci.java | 22 +- .../gatk/traversals/TraverseReadPairs.java | 30 +-- .../sting/gatk/traversals/TraverseReads.java | 27 +-- .../sting/gatk/walkers/DuplicateWalker.java | 20 +- .../sting/gatk/walkers/LocusWalker.java | 27 +-- .../sting/gatk/walkers/Walker.java | 11 +- .../utils/sam/ArtificialReadsTraversal.java | 30 +-- .../utils/sam/ArtificialSAMFileReader.java | 4 - .../utils/sam/ArtificialSAMIterator.java | 7 +- .../utils/sam/SAMReadValidationException.java | 52 ---- .../utils/sam/SAMReadViolationHistogram.java | 69 ------ .../providers/LocusViewTemplate.java | 34 ++- .../datasources/shards/MockLocusShard.java | 6 +- .../SAMBAMDataSourceUnitTest.java | 9 +- .../BoundedReadIteratorUnitTest.java | 11 +- .../LocusIteratorByStateUnitTest.java | 22 +- .../StingSAMIteratorAdapterUnitTest.java | 6 +- .../traversals/TraverseReadsUnitTest.java | 7 +- 56 files changed, 689 insertions(+), 850 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/ReadMetrics.java rename java/src/org/broadinstitute/sting/gatk/{Reads.java => ReadProperties.java} (98%) rename java/src/org/broadinstitute/sting/{utils/sam/SAMReadValidator.java => gatk/filters/MalformedReadFilter.java} (58%) delete mode 100644 java/src/org/broadinstitute/sting/gatk/iterators/MalformedSAMFilteringIterator.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java delete mode 100644 java/src/org/broadinstitute/sting/utils/sam/SAMReadValidationException.java delete mode 100644 java/src/org/broadinstitute/sting/utils/sam/SAMReadViolationHistogram.java diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 6d5ee4d7a..f38ebdd77 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -523,7 +523,7 @@ public class GenomeAnalysisEngine { * @param argCollection The collection of arguments passed to the engine. * @return The reads object providing reads source info. */ - private Reads extractSourceInfo(Walker walker, Collection filters, GATKArgumentCollection argCollection) { + private ReadProperties extractSourceInfo(Walker walker, Collection filters, GATKArgumentCollection argCollection) { DownsamplingMethod method = null; if(argCollection.downsamplingType != DownsampleType.NONE) @@ -533,7 +533,7 @@ public class GenomeAnalysisEngine { else method = new DownsamplingMethod(DownsampleType.NONE,null,null); - return new Reads(argCollection.samFiles, + return new ReadProperties(argCollection.samFiles, argCollection.strictnessLevel, argCollection.readBufferSize, method, @@ -612,7 +612,7 @@ public class GenomeAnalysisEngine { * @param tracks a collection of the reference ordered data tracks */ private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection tracks) { - if ((reads == null && (tracks == null || tracks.isEmpty())) || reference == null ) + if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null ) return; // Compile a set of sequence names that exist in the reference file. @@ -623,7 +623,7 @@ public class GenomeAnalysisEngine { referenceSequenceNames.add(dictionaryEntry.getSequenceName()); - if (reads != null) { + if (!reads.isEmpty()) { // Compile a set of sequence names that exist in the BAM files. SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary(); @@ -752,7 +752,7 @@ public class GenomeAnalysisEngine { region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); } - return new MonolithicShardStrategy(shardType,region); + return new MonolithicShardStrategy(readsDataSource,shardType,region); } ShardStrategy shardStrategy = null; @@ -764,7 +764,7 @@ public class GenomeAnalysisEngine { if (walker instanceof RodWalker) SHARD_SIZE *= 1000; if (intervals != null && !intervals.isEmpty()) { - if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) + if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) Utils.scareUser("Locus walkers can only walk over coordinate-sorted data. Please resort your input BAM file."); shardStrategy = ShardStrategyFactory.shatter(readsDataSource, @@ -820,11 +820,7 @@ public class GenomeAnalysisEngine { * @param reads the read source information * @return A data source for the given set of reads. */ - private SAMDataSource createReadsDataSource(Reads reads) { - // By reference traversals are happy with no reads. Make sure that case is handled. - if (reads.getReadsFiles().size() == 0) - return null; - + private SAMDataSource createReadsDataSource(ReadProperties reads) { return new SAMDataSource(reads); } @@ -934,4 +930,12 @@ public class GenomeAnalysisEngine { public List getRodDataSources() { return this.rodDataSources; } + + /** + * Gets cumulative metrics about the entire run to this point. + * @return cumulative metrics about the entire run. + */ + public ReadMetrics getCumulativeMetrics() { + return readsDataSource.getCumulativeReadMetrics(); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java new file mode 100755 index 000000000..fbdca4c13 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import net.sf.picard.filter.SamRecordFilter; + +import java.util.Map; +import java.util.HashMap; +import java.util.Collections; + +import org.broadinstitute.sting.utils.StingException; + +/** + * Holds a bunch of basic information about the traversal. + */ +public class ReadMetrics implements Cloneable { + // Number of records (loci, reads) we've processed + private long nRecords; + // How many reads have we processed, along with those skipped for various reasons + private long nReads; + private long nSkippedReads; + private long nUnmappedReads; + private long nNotPrimary; + private long nBadAlignments; + private long nSkippedIndels; + private long nDuplicates; + private Map counter = new HashMap(); + + /** + * Combines these metrics with a set of other metrics, storing the results in this class. + * @param metrics The metrics to fold into this class. + */ + public void incrementMetrics(ReadMetrics metrics) { + nRecords += metrics.nRecords; + nReads += metrics.nReads; + nSkippedReads += metrics.nSkippedReads; + nUnmappedReads += metrics.nUnmappedReads; + nNotPrimary += metrics.nNotPrimary; + nBadAlignments += metrics.nBadAlignments; + nSkippedIndels += metrics.nSkippedIndels; + nDuplicates += metrics.nDuplicates; + for(Map.Entry counterEntry: metrics.counter.entrySet()) { + Class counterType = counterEntry.getKey(); + long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue(); + counter.put(counterType,newValue); + } + } + + /** + * Create a copy of the given read metrics. + * @return + */ + public ReadMetrics clone() { + ReadMetrics newMetrics; + try { + newMetrics = (ReadMetrics)super.clone(); + } + catch(CloneNotSupportedException ex) { + throw new StingException("Unable to clone runtime metrics",ex); + } + newMetrics.nRecords = nRecords; + newMetrics.nReads = nReads; + newMetrics.nSkippedReads = nSkippedReads; + newMetrics.nUnmappedReads = nUnmappedReads; + newMetrics.nNotPrimary = nNotPrimary; + newMetrics.nBadAlignments = nBadAlignments; + newMetrics.nSkippedIndels = nSkippedIndels; + newMetrics.nDuplicates = nDuplicates; + newMetrics.counter = new HashMap(counter); + + return newMetrics; + } + + + public void incrementFilter(SamRecordFilter filter) { + long c = 0; + if ( counter.containsKey(filter.getClass()) ) { + c = counter.get(filter.getClass()); + } + + counter.put(filter.getClass(), c + 1L); + } + + public Map getCountsByFilter() { + return Collections.unmodifiableMap(counter); + } + + /** + * Gets the number of 'iterations' (one call of filter/map/reduce sequence) performed. + * @return The number of iterations completed. + */ + public long getNumIterations() { + return nRecords; + } + + /** + * Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed. + */ + public void incrementNumIterations() { + nRecords++; + } + + public long getNumReadsSeen() { + return nReads; + } + + /** + * Increments the number of reads seen in the course of this run. + */ + public void incrementNumReadsSeen() { + nReads++; + } + + /** + * Gets the cumulative number of reads skipped in the course of this run. + * @return Cumulative number of reads skipped in the course of this run. + */ + public long getNumSkippedReads() { + return nSkippedReads; + } + + /** + * Increments the cumulative number of reads skipped in the course of this run. + */ + public void incrementNumSkippedReads() { + nSkippedReads++; + } + + /** + * Gets the number of unmapped reads skipped in the course of this run. + * @return The number of unmapped reads skipped. + */ + public long getNumUnmappedReads() { + return nUnmappedReads; + } + + /** + * Increments the number of unmapped reads skipped in the course of this run. + */ + public void incrementNumUnmappedReads() { + nUnmappedReads++; + } + + /** + * + * @return + */ + public long getNumNonPrimaryReads() { + return nNotPrimary; + } + + /** + * + */ + public void incrementNumNonPrimaryReads() { + nNotPrimary++; + } + + /** + * + * @return + */ + public long getNumBadAlignments() { + return nBadAlignments; + } + + /** + * + */ + public void incrementNumBadAlignments() { + nBadAlignments++; + } + + /** + * + * @return + */ + public long getNumSkippedIndels() { + return nSkippedIndels; + } + + /** + * + */ + public void incrementNumSkippedIndels() { + nSkippedIndels++; + } + + /** + * + * @return + */ + public long getNumDuplicates() { + return nDuplicates; + } + + /** + * + */ + public void incrementNumDuplicates() { + nDuplicates++; + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java similarity index 98% rename from java/src/org/broadinstitute/sting/gatk/Reads.java rename to java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 8affdf3c9..d92053bfb 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -25,7 +25,7 @@ import java.util.Collection; * A data structure containing information about the reads data sources as well as * information about how they should be downsampled, sorted, and filtered. */ -public class Reads { +public class ReadProperties { private List readsFiles = null; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Integer readBufferSize = null; @@ -115,7 +115,7 @@ public class Reads { * Simple constructor for unit testing. * @param readsFiles List of reads files to open. */ - public Reads( List readsFiles ) { + public ReadProperties( List readsFiles ) { this.readsFiles = readsFiles; this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null); this.supplementalFilters = new ArrayList(); @@ -138,7 +138,7 @@ public class Reads { * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. */ - Reads( List samFiles, + ReadProperties( List samFiles, SAMFileReader.ValidationStringency strictness, Integer readBufferSize, DownsamplingMethod downsamplingMethod, diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index d8d4a7861..066aebbd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -435,11 +435,14 @@ public class WalkerManager extends PluginManager { * @param walkerClass Class of the walker to inspect. * @return An array of types extending from SamRecordFilter. Will never be null. */ - @SuppressWarnings("unchecked") - public static Class[] getReadFilterTypes(Class walkerClass) { - if( !walkerClass.isAnnotationPresent(ReadFilters.class) ) - return new Class[0]; - return walkerClass.getAnnotation(ReadFilters.class).value(); + public static Collection> getReadFilterTypes(Class walkerClass) { + Set> filterTypes = new HashSet>(); + while(walkerClass != null) { + if(walkerClass.isAnnotationPresent(ReadFilters.class)) + filterTypes.addAll(Arrays.asList(walkerClass.getAnnotation(ReadFilters.class).value())); + walkerClass = walkerClass.getSuperclass(); + } + return filterTypes; } /** @@ -447,7 +450,7 @@ public class WalkerManager extends PluginManager { * @param walker The walker to inspect. * @return An array of types extending from SamRecordFilter. Will never be null. */ - public static Class[] getReadFilterTypes(Walker walker) { + public static Collection> getReadFilterTypes(Walker walker) { return getReadFilterTypes(walker.getClass()); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 0d728a668..0fad873ce 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import java.util.Collection; @@ -20,7 +20,7 @@ public class LocusShardDataProvider extends ShardDataProvider { /** * Information about the source of the read data. */ - private final Reads sourceInfo; + private final ReadProperties sourceInfo; /** * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). @@ -37,7 +37,7 @@ public class LocusShardDataProvider extends ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public LocusShardDataProvider(Shard shard, Reads sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { + public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { super(shard,reference,rods); this.sourceInfo = sourceInfo; this.locus = locus; @@ -48,7 +48,7 @@ public class LocusShardDataProvider extends ShardDataProvider { * Returns information about the source of the reads. * @return Info about the source of the reads. */ - public Reads getSourceInfo() { + public ReadProperties getSourceInfo() { return sourceInfo; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 886e4afab..2f6a2df88 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; @@ -36,7 +36,7 @@ public abstract class LocusView extends LocusIterator implements View { /** * Source info for this view. Informs the class about downsampling requirements. */ - private Reads sourceInfo; + private ReadProperties sourceInfo; /** * The actual locus context iterator. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index d0c9914a7..e48849f9b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -139,6 +139,9 @@ public abstract class ShardDataProvider { // Explicitly purge registered views to ensure that we don't end up with circular references // to views, which can in turn hold state. registeredViews.clear(); + + if(shard != null) + shard.close(); } @Override diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java index 9d79fa7e5..46f62877e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java @@ -3,7 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; import java.util.List; import java.util.Map; @@ -19,6 +22,11 @@ import net.sf.picard.filter.SamRecordFilter; * @date Apr 7, 2009 */ public class LocusShard implements BAMFormatAwareShard { + /** + * Source for read data. + */ + private SAMDataSource dataSource; + /** * A list of the chunks associated with this shard. */ @@ -27,16 +35,30 @@ public class LocusShard implements BAMFormatAwareShard { // currently our location private final List loci; + /** + * Statistics about which reads in this shards were used and which were filtered away. + */ + private final ReadMetrics readMetrics = new ReadMetrics(); + /** * Create a new locus shard, divided by index. * @param intervals List of intervals to process. * @param fileSpans File spans associated with that interval. */ - public LocusShard(List intervals, Map fileSpans) { + public LocusShard(SAMDataSource dataSource, List intervals, Map fileSpans) { + this.dataSource = dataSource; this.loci = intervals; this.fileSpans = fileSpans; } + /** + * Closes the shard, tallying and incorporating read data. + */ + @Override + public void close() { + dataSource.incorporateReadMetrics(readMetrics); + } + /** * Gets the file spans associated with this locus shard. * @return A list of the file spans to use when retrieving locus data. @@ -104,6 +126,24 @@ public class LocusShard implements BAMFormatAwareShard { return ShardType.LOCUS; } + /** + * Gets key read validation and filtering properties. + * @return set of read properties associated with this shard. + */ + @Override + public ReadProperties getReadProperties() { + return dataSource.getReadsInfo(); + } + + /** + * Retrieves a storage space of metrics about number of reads included, filtered, etc. + * @return Storage space for metrics. + */ + @Override + public ReadMetrics getReadMetrics() { + return readMetrics; + } + /** * String representation of this shard. * @return A string representation of the boundaries of this shard. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java index 664b2886c..f80384139 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java @@ -29,8 +29,10 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.ReadProperties; import java.util.*; +import java.io.File; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; @@ -57,7 +59,8 @@ public class LocusShardStrategy implements ShardStrategy { * @param locations List of locations for which to load data. */ LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) { - if(reads != null) { + this.reads = reads; + if(!reads.isEmpty()) { List intervals; if(locations == null) { // If no locations were passed in, shard the entire BAM file. @@ -77,12 +80,10 @@ public class LocusShardStrategy implements ShardStrategy { else intervals = locations.toList(); - this.reads = reads; this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals); } else { final int maxShardSize = 100000; - this.reads = null; List filePointers = new ArrayList(); if(locations == null) { for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) { @@ -118,7 +119,7 @@ public class LocusShardStrategy implements ShardStrategy { public LocusShard next() { FilePointer nextFilePointer = filePointerIterator.next(); Map fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null; - return new LocusShard(nextFilePointer.locations,fileSpansBounding); + return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding); } /** we don't support the remove command */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java index ff5c10220..0a971df2e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java @@ -2,14 +2,11 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import java.util.Collections; import java.util.List; -import java.util.ArrayList; - -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; /** * A single, monolithic shard bridging all available data. @@ -17,6 +14,11 @@ import net.sf.samtools.SAMSequenceRecord; * @version 0.1 */ public class MonolithicShard implements Shard { + /** + * Reads data, if applicable. + */ + private final SAMDataSource readsDataSource; + /** * What type of MonolithicShard is this? Read or locus? */ @@ -27,18 +29,32 @@ public class MonolithicShard implements Shard { */ private final List locs; + /** + * Statistics about which reads in this shards were used and which were filtered away. + */ + private final ReadMetrics readMetrics = new ReadMetrics(); + /** * Creates a new monolithic shard of the given type. * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. * @param locs Intervals that this monolithic shard should process. */ - public MonolithicShard(ShardType shardType, List locs) { + public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List locs) { + this.readsDataSource = readsDataSource; if(shardType != ShardType.LOCUS && shardType != ShardType.READ) throw new StingException("Invalid shard type for monolithic shard: " + shardType); this.shardType = shardType; this.locs = locs; } + /** + * Closes the shard, tallying and incorporating read data. + */ + @Override + public void close() { + readsDataSource.incorporateReadMetrics(readMetrics); + } + /** * Returns null, indicating that (in this case) the entire genome is covered. * @return null. @@ -51,10 +67,29 @@ public class MonolithicShard implements Shard { * Reports the type of monolithic shard. * @return Type of monolithic shard. */ + @Override public ShardType getShardType() { return shardType; } + /** + * Gets key read validation and filtering properties. + * @return set of read properties associated with this shard. + */ + @Override + public ReadProperties getReadProperties() { + return readsDataSource.getReadsInfo(); + } + + /** + * Retrieves a storage space of metrics about number of reads included, filtered, etc. + * @return Storage space for metrics. + */ + @Override + public ReadMetrics getReadMetrics() { + return readMetrics; + } + /** * String representation of this shard. * @return "entire genome". diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java index c8c23dc49..e34f3cf12 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import java.util.Iterator; import java.util.NoSuchElementException; @@ -25,8 +27,8 @@ public class MonolithicShardStrategy implements ShardStrategy { * Create a new shard strategy for shards of the given type. * @param shardType The shard type. */ - public MonolithicShardStrategy(final Shard.ShardType shardType, final List region) { - shard = new MonolithicShard(shardType,region); + public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List region) { + shard = new MonolithicShard(readsDataSource,shardType,region); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java index a80254a21..9ebe52e3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java @@ -1,10 +1,12 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import java.util.*; @@ -35,10 +37,7 @@ import net.sf.picard.filter.SamRecordFilter; * @version 0.1 */ public class ReadShard implements BAMFormatAwareShard { - /** - * Information about the origins of reads. - */ - private final Reads sourceInfo; + private final SAMDataSource readsDataSource; /** * The data backing the next chunks to deliver to the traversal engine. @@ -50,17 +49,30 @@ public class ReadShard implements BAMFormatAwareShard { */ private final Collection reads = new ArrayList(ReadShardStrategy.MAX_READS); + /** + * Statistics about which reads in this shards were used and which were filtered away. + */ + private final ReadMetrics readMetrics = new ReadMetrics(); + /** * The filter to be applied to all reads meeting this criteria. */ private final SamRecordFilter filter; - public ReadShard(Reads sourceInfo, Map fileSpans, SamRecordFilter filter) { - this.sourceInfo = sourceInfo; + public ReadShard(SAMDataSource readsDataSource, Map fileSpans, SamRecordFilter filter) { + this.readsDataSource = readsDataSource; this.fileSpans = fileSpans; this.filter = filter; } + /** + * Closes the shard, tallying and incorporating read data. + */ + @Override + public void close() { + readsDataSource.incorporateReadMetrics(readMetrics); + } + /** * Get the list of chunks delimiting this shard. * @return a list of chunks that contain data for this shard. @@ -121,7 +133,7 @@ public class ReadShard implements BAMFormatAwareShard { */ @Override public StingSAMIterator iterator() { - return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator()); + return StingSAMIteratorAdapter.adapt(reads.iterator()); } @Override @@ -137,7 +149,25 @@ public class ReadShard implements BAMFormatAwareShard { @Override public ShardType getShardType() { return ShardType.READ; - } + } + + /** + * Gets key read validation and filtering properties. + * @return set of read properties associated with this shard. + */ + @Override + public ReadProperties getReadProperties() { + return readsDataSource.getReadsInfo(); + } + + /** + * Retrieves a storage space of metrics about number of reads included, filtered, etc. + * @return Storage space for metrics. + */ + @Override + public ReadMetrics getReadMetrics() { + return readMetrics; + } /** * String representation of this shard. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java index 37e65435b..4fc928ccc 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java @@ -138,7 +138,7 @@ public class ReadShardStrategy implements ShardStrategy { if(selectedReaders.size() > 0) { filter = new ReadOverlapFilter(currentFilePointer.locations); - BAMFormatAwareShard shard = new ReadShard(dataSource.getReadsInfo(),selectedReaders,filter); + BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,filter); dataSource.fillShard(shard); if(!shard.isBufferEmpty()) { @@ -152,7 +152,7 @@ public class ReadShardStrategy implements ShardStrategy { } } else { - BAMFormatAwareShard shard = new ReadShard(dataSource.getReadsInfo(),position,filter); + BAMFormatAwareShard shard = new ReadShard(dataSource,position,filter); dataSource.fillShard(shard); nextShard = !shard.isBufferEmpty() ? shard : null; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java index b62b33ca4..4a7c9dda5 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; import java.io.Serializable; import java.util.List; @@ -42,4 +44,21 @@ public interface Shard extends Serializable { * @return ShardType, indicating the type */ public ShardType getShardType(); + + /** + * Does any releasing / aggregation required when the shard is through being processed. + */ + public void close(); + + /** + * Gets required configuration for validating and filtering reads. + * @return read configuration properties. + */ + public ReadProperties getReadProperties(); + + /** + * Gets the runtime metrics associated with this shard. + * @return metrics and read counts. + */ + public ReadMetrics getReadMetrics(); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index f3124492c..9212c6f2b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -37,10 +37,10 @@ import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; -import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import org.broadinstitute.sting.utils.StingException; import java.io.File; @@ -55,7 +55,12 @@ import java.util.*; */ public class SAMDataSource implements SimpleDataSource { /** Backing support for reads. */ - protected final Reads reads; + protected final ReadProperties readProperties; + + /** + * Runtime metrics of reads filtered, etc. + */ + protected final ReadMetrics readMetrics; /** * Identifiers for the readers driving this data source. @@ -91,11 +96,6 @@ public class SAMDataSource implements SimpleDataSource { /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(SAMDataSource.class); - /** - * A histogram of exactly what reads were removed from the input stream and why. - */ - private SAMReadViolationHistogram violations = new SAMReadViolationHistogram(); - /** * A collection of readers driving the merging process. */ @@ -105,13 +105,10 @@ public class SAMDataSource implements SimpleDataSource { * Create a new SAM data source given the supplied read metadata. * @param reads The read metadata. */ - public SAMDataSource(Reads reads) { - this.reads = reads; + public SAMDataSource(ReadProperties reads) { + this.readProperties = reads; + this.readMetrics = new ReadMetrics(); - // check the length - if (reads.getReadsFiles().size() < 1) { - throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0"); - } for (File smFile : reads.getReadsFiles()) { if (!smFile.canRead()) { throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName()); @@ -166,7 +163,15 @@ public class SAMDataSource implements SimpleDataSource { * information about how they are downsampled, sorted, and filtered * @return */ - public Reads getReadsInfo() { return reads; } + public ReadProperties getReadsInfo() { return readProperties; } + + /** + * Checks to see whether any reads files are supplying data. + * @return True if no reads files are supplying data to the traversal; false otherwise. + */ + public boolean isEmpty() { + return readProperties.getReadsFiles().size() == 0; + } /** * Gets the SAM file associated with a given reader ID. @@ -263,11 +268,23 @@ public class SAMDataSource implements SimpleDataSource { } /** - * Returns a histogram of reads that were screened out, grouped by the nature of the error. - * @return Histogram of reads. Will not be null. + * Gets the cumulative read metrics for shards already processed. + * @return Cumulative read metrics. */ - public SAMReadViolationHistogram getViolationHistogram() { - return violations; + public ReadMetrics getCumulativeReadMetrics() { + synchronized(readMetrics) { + return readMetrics.clone(); + } + } + + /** + * Incorporate the given read metrics into the cumulative read metrics. + * @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics. + */ + public void incorporateReadMetrics(final ReadMetrics readMetrics) { + synchronized(this.readMetrics) { + this.readMetrics.incrementMetrics(readMetrics); + } } /** @@ -361,18 +378,19 @@ public class SAMDataSource implements SimpleDataSource { if(shard.getFileSpans().get(id) == null) continue; CloseableIterator iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id)); - if(reads.getReadBufferSize() != null) - iterator = new BufferingReadIterator(iterator,reads.getReadBufferSize()); + if(readProperties.getReadBufferSize() != null) + iterator = new BufferingReadIterator(iterator,readProperties.getReadBufferSize()); if(shard.getFilter() != null) iterator = new FilteringIterator(iterator,shard.getFilter()); // not a counting iterator because we don't want to show the filtering of reads mergingIterator.addIterator(readers.getReader(id),iterator); } - return applyDecoratingIterators(enableVerification, - new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)), - reads.getDownsamplingMethod().toFraction, - reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), - reads.getSupplementalFilters()); + return applyDecoratingIterators(shard.getReadMetrics(), + enableVerification, + new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), + readProperties.getDownsamplingMethod().toFraction, + readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), + readProperties.getSupplementalFilters()); } /** @@ -389,11 +407,12 @@ public class SAMDataSource implements SimpleDataSource { for(SAMReaderID id: getReaderIDs()) mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator()); - return applyDecoratingIterators(shard instanceof ReadShard, - new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,mergingIterator)), - reads.getDownsamplingMethod().toFraction, - reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), - reads.getSupplementalFilters()); + return applyDecoratingIterators(shard.getReadMetrics(), + shard instanceof ReadShard, + new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), + readProperties.getDownsamplingMethod().toFraction, + readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), + readProperties.getSupplementalFilters()); } /** @@ -411,6 +430,7 @@ public class SAMDataSource implements SimpleDataSource { /** * Filter reads based on user-specified criteria. * + * @param readMetrics metrics to track when using this iterator. * @param enableVerification Verify the order of reads. * @param wrappedIterator the raw data source. * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). @@ -418,12 +438,12 @@ public class SAMDataSource implements SimpleDataSource { * @param supplementalFilters additional filters to apply to the reads. * @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null. */ - protected StingSAMIterator applyDecoratingIterators(boolean enableVerification, + protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics, + boolean enableVerification, StingSAMIterator wrappedIterator, Double downsamplingFraction, Boolean noValidationOfReadOrder, Collection supplementalFilters) { - wrappedIterator = new MalformedSAMFilteringIterator(getHeader(),wrappedIterator,violations ); wrappedIterator = new ReadFormattingIterator(wrappedIterator); // NOTE: this (and other filtering) should be done before on-the-fly sorting @@ -436,9 +456,7 @@ public class SAMDataSource implements SimpleDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); - for( SamRecordFilter supplementalFilter: supplementalFilters ) - wrappedIterator = StingSAMIteratorAdapter.adapt(wrappedIterator.getSourceInfo(), - new CountingFilteringIterator(wrappedIterator,supplementalFilter)); + wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); return wrappedIterator; } @@ -511,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new StingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(reads); + SAMReaders readers = new SAMReaders(readProperties); allResources.add(readers); availableResources.add(readers); } @@ -531,7 +549,7 @@ public class SAMDataSource implements SimpleDataSource { * Derive a new set of readers from the Reads metadata. * @param sourceInfo Metadata for the reads to load. */ - public SAMReaders(Reads sourceInfo) { + public SAMReaders(ReadProperties sourceInfo) { for(File readsFile: sourceInfo.getReadsFiles()) { SAMFileReader reader = new SAMFileReader(readsFile); reader.enableFileSource(true); @@ -616,10 +634,6 @@ public class SAMDataSource implements SimpleDataSource { */ private final StingSAMIterator wrappedIterator; - public Reads getSourceInfo() { - return wrappedIterator.getSourceInfo(); - } - public ReleasingIterator(SAMReaders resource, StingSAMIterator wrapped) { this.resource = resource; this.wrappedIterator = wrapped; diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index f3faf9a52..4a8a2d366 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; @@ -192,7 +193,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar outputTracker.bypassThreadLocalStorage(true); try { walker.onTraversalDone(result); - printOnTraversalDone(result); + printOnTraversalDone(result,engine.getCumulativeMetrics()); } finally { outputTracker.bypassThreadLocalStorage(false); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 9acebe6a4..23e4b575e 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -12,12 +12,9 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import java.util.Collection; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; import net.sf.picard.reference.IndexedFastaSequenceFile; @@ -55,7 +52,7 @@ public class LinearMicroScheduler extends MicroScheduler { // New experimental code for managing locus intervals. if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(getReadIterator(shard), shard.getGenomeLocs(), walker.getMandatoryReadFilters(), lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards()); for(WindowMaker.WindowMakerIterator iterator: windowMaker) { ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); @@ -74,7 +71,7 @@ public class LinearMicroScheduler extends MicroScheduler { Object result = accumulator.finishTraversal(); - printOnTraversalDone(result); + printOnTraversalDone(result,engine.getCumulativeMetrics()); outputTracker.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index ff6712481..e59f083af 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -35,9 +35,9 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.utils.StingException; import java.util.*; @@ -147,23 +147,15 @@ public abstract class MicroScheduler { * @return an iterator over the reads specified in the shard. */ protected StingSAMIterator getReadIterator(Shard shard) { - return (reads != null) ? reads.seek(shard) : new NullSAMIterator(new Reads(new ArrayList())); + return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(new ArrayList())); } /** * Print summary information for the analysis. * @param sum The final reduce output. */ - protected void printOnTraversalDone(Object sum) { - // HACK: The microscheduler should be too dumb to know anything about the data - // it's actually processing; it should just funnel anything it receives - // to the traversal engine. - // TODO: Implement code to allow the datasources to print summary info of the - // data they've seen. - if( reads != null && reads.getViolationHistogram().getViolationCount() > 0 ) - logger.warn(String.format("%n%s",reads.getViolationHistogram())); - - traversalEngine.printOnTraversalDone(sum); + protected void printOnTraversalDone(Object sum, ReadMetrics metrics) { + traversalEngine.printOnTraversalDone(metrics); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index cf110d1e2..ccd54edab 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.StingException; import java.util.concurrent.Callable; @@ -48,7 +47,7 @@ public class ShardTraverser implements Callable { TraversalEngine traversalEngine, Walker walker, Shard shard, - ThreadLocalOutputTracker outputTracker ) { + ThreadLocalOutputTracker outputTracker) { this.microScheduler = microScheduler; this.walker = walker; this.traversalEngine = traversalEngine; @@ -62,7 +61,7 @@ public class ShardTraverser implements Callable { Object accumulator = walker.reduceInit(); LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(microScheduler.getReadIterator(shard),shard.getGenomeLocs(),walker.getMandatoryReadFilters(),lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards()); ShardDataProvider dataProvider = null; for(WindowMaker.WindowMakerIterator iterator: windowMaker) { diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index ce2b39fb4..eed24343a 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -1,21 +1,14 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; -import net.sf.samtools.SAMRecord; import net.sf.picard.util.PeekableIterator; -import net.sf.picard.filter.FilteringIterator; -import net.sf.picard.filter.SamRecordFilter; /** * Buffer shards of data which may or may not contain multiple loci into @@ -29,7 +22,7 @@ public class WindowMaker implements Iterable, I /** * Source information for iteration. */ - private final Reads sourceInfo; + private final ReadProperties sourceInfo; /** * Hold the read iterator so that it can be closed later. @@ -61,14 +54,13 @@ public class WindowMaker implements Iterable, I * the given intervals. * @param iterator The data source for this window. * @param intervals The set of intervals over which to traverse. + * @param discards a filter at that indicates read position relative to some locus? */ - public WindowMaker(StingSAMIterator iterator, List intervals, List filters, List discards ) { - this.sourceInfo = iterator.getSourceInfo(); + public WindowMaker(Shard shard, StingSAMIterator iterator, List intervals, List discards ) { + this.sourceInfo = shard.getReadProperties(); this.readIterator = iterator; - LocusIterator locusIterator; - Iterator wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters); - locusIterator = new LocusIteratorByState(wrappedIterator,sourceInfo,discards); + LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards); this.locusOverflowTracker = locusIterator.getLocusOverflowTracker(); @@ -108,7 +100,7 @@ public class WindowMaker implements Iterable, I seedNextLocus(); } - public Reads getSourceInfo() { + public ReadProperties getSourceInfo() { return sourceInfo; } diff --git a/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index f69b0749f..aa5a75396 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -27,45 +27,38 @@ import net.sf.samtools.util.CloserUtil; import java.util.Iterator; import java.util.NoSuchElementException; +import java.util.Collection; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; +import org.broadinstitute.sting.gatk.ReadMetrics; /** * Filtering Iterator which takes a filter and an iterator and iterates * through only those records which are not rejected by the filter. - * - * Just a copy of a unmodifiable FilteringIterator from Picard - * * @author Mark DePristo */ public class CountingFilteringIterator implements CloseableIterator { + private final ReadMetrics runtimeMetrics; private final Iterator iterator; - private final SamRecordFilter filter; + private final Collection filters; private SAMRecord next = null; /** * Constructor * + * @param metrics metrics to accumulate on the nature of filtered reads. * @param iterator the backing iterator - * @param filter the filter (which may be a FilterAggregator) + * @param filters the filter (which may be a FilterAggregator) */ - public CountingFilteringIterator(Iterator iterator, SamRecordFilter filter) { + public CountingFilteringIterator(ReadMetrics metrics, Iterator iterator, Collection filters) { + this.runtimeMetrics = metrics; this.iterator = iterator; - this.filter = filter; + this.filters = filters; next = getNextRecord(); } - /** - * Special case to count passing records - * @param iterator - */ - public CountingFilteringIterator(Iterator iterator) { - this(iterator, null); - } - /** * Returns true if the iteration has more elements. * @@ -111,15 +104,18 @@ public class CountingFilteringIterator implements CloseableIterator { private SAMRecord getNextRecord() { while (iterator.hasNext()) { SAMRecord record = iterator.next(); + runtimeMetrics.incrementNumReadsSeen(); - if ( filter == null ) { - TraversalStatistics.nReads++; - return record; - } else if (!filter.filterOut(record)) { - return record; - } else { - TraversalStatistics.incrementFilter(filter); + boolean filtered = false; + for(SamRecordFilter filter: filters) { + if(filter.filterOut(record)) { + runtimeMetrics.incrementFilter(filter); + filtered = true; + break; + } } + + if(!filtered) return record; } return null; diff --git a/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java index 17a0c698f..872cba05c 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; /** * Created by IntelliJ IDEA. diff --git a/java/src/org/broadinstitute/sting/utils/sam/SAMReadValidator.java b/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java similarity index 58% rename from java/src/org/broadinstitute/sting/utils/sam/SAMReadValidator.java rename to java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 10b091fb8..b48a5cd87 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/SAMReadValidator.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2010, The Broad Institute + * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -11,7 +12,6 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -22,77 +22,84 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.sam; +package org.broadinstitute.sting.gatk.filters; +import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; /** - * Validates reads against a specific set of criteria. If it finds a - * read that fails to meet the given criteria, it will throw an exception. - * The caller can decide whether to ignore the error, hide the read - * from the user, or blow up in a spectacular ball of fire. + * Filter out malformed reads. * - * @author hanna + * @author mhanna * @version 0.1 */ -public class SAMReadValidator { - /** - * Validate the sam read against a list of criteria that are known to cause failures in the GATK. - * Throw an exception if the read fails. - * @param read the read to validate. Must not be null. - */ - public static void validate( SAMFileHeader header, SAMRecord read ) throws SAMReadValidationException { - checkInvalidAlignmentStart(read); - checkInvalidAlignmentEnd(read); - checkAlignmentDisagreesWithHeader(header,read); - checkCigarDisagreesWithAlignment(read); +public class MalformedReadFilter implements SamRecordFilter { + public boolean filterOut(SAMRecord read) { + return !checkInvalidAlignmentStart(read) || + !checkInvalidAlignmentEnd(read) || + !checkAlignmentDisagreesWithHeader(GenomeAnalysisEngine.instance.getSAMFileHeader(),read) || + !checkCigarDisagreesWithAlignment(read); } /** * Check for the case in which the alignment start is inconsistent with the read unmapped flag. * @param read The read to validate. + * @return true if read start is valid, false otherwise. */ - private static void checkInvalidAlignmentStart( SAMRecord read ) { + private static boolean checkInvalidAlignmentStart( SAMRecord read ) { + // read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) - throw new SAMReadValidationException("read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START"); + return false; + // Read is not flagged as 'unmapped', but alignment start is -1 if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == -1 ) - throw new SAMReadValidationException("Read is not flagged as 'unmapped', but alignment start is -1"); + return false; + return true; } /** * Check for invalid end of alignments. * @param read The read to validate. + * @return true if read end is valid, false otherwise. */ - private static void checkInvalidAlignmentEnd( SAMRecord read ) { + private static boolean checkInvalidAlignmentEnd( SAMRecord read ) { + // Alignment ends prior to its beginning if( !read.getReadUnmappedFlag() && read.getAlignmentEnd() != -1 && read.getAlignmentEnd() < read.getAlignmentStart() ) - throw new SAMReadValidationException("Alignment ends prior to its beginning"); + return false; + return true; } /** * Check to ensure that the alignment makes sense based on the contents of the header. * @param header The SAM file header. * @param read The read to verify. + * @return true if alignment agrees with header, false othrewise. */ - private static void checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) { + private static boolean checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) { + // Read is aligned to nonexistent contig if( read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) - throw new SAMReadValidationException("Read is aligned to nonexistent contig"); + return false; SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() ); + // Read is aligned to a point after the end of the contig if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() ) - throw new SAMReadValidationException("Read is aligned to a point after the end of the contig"); + return false; + return true; } /** - * Check for inconsistencies between the cigar string and the + * Check for inconsistencies between the cigar string and the * @param read The read to validate. + * @return true if cigar agrees with alignment, false otherwise. */ - private static void checkCigarDisagreesWithAlignment( SAMRecord read ) { + private static boolean checkCigarDisagreesWithAlignment(SAMRecord read) { + // Read has a valid alignment start, but the CIGAR string is empty if( !read.getReadUnmappedFlag() && read.getAlignmentStart() != -1 && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START && read.getAlignmentBlocks().size() == 0 ) - throw new SAMReadValidationException("Read has a valid alignment start, but the CIGAR string is empty"); + return false; + return true; } } - diff --git a/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java index 8837f4e28..cbbd4a315 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; /** * Created by IntelliJ IDEA. diff --git a/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java index 80cbb7024..fe6dfbb9e 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; /** * Created by IntelliJ IDEA. diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index 021f57f84..b5643f834 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -6,7 +6,7 @@ import net.sf.picard.sam.MergingSamRecordIterator; import java.util.Iterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; /* * Copyright (c) 2009 The Broad Institute @@ -76,15 +76,6 @@ public class BoundedReadIterator implements StingSAMIterator { this.doNotUseThatUnmappedReadPile = useThem; } - /** - * Retrieves information about reads sources. - * @return Info about the sources of reads. - */ - public Reads getSourceInfo() { - return iterator.getSourceInfo(); - } - - public SAMFileHeader getHeader() { // todo: this is bad, we need an iterface out there for samrecords that supports getting the header, // regardless of the merging diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java index 78fa540c2..b2de2b386 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java @@ -5,7 +5,7 @@ import net.sf.samtools.SAMRecord; import java.util.Iterator; import java.util.Random; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; public class DownsampleIterator implements StingSAMIterator { @@ -22,15 +22,6 @@ public class DownsampleIterator implements StingSAMIterator { next = getNextRecord(); } - /** - * Retrieves information about reads sources. - * @return Info about the sources of reads. - */ - public Reads getSourceInfo() { - return it.getSourceInfo(); - } - - public boolean hasNext() { return next != null; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index ad53372f3..90a694149 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.*; import net.sf.picard.util.PeekableIterator; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsampleType; @@ -264,7 +264,7 @@ public class LocusIteratorByState extends LocusIterator { //final boolean DEBUG = false; //final boolean DEBUG2 = false && DEBUG; - private Reads readInfo; + private ReadProperties readInfo; private AlignmentContext nextAlignmentContext; private List filters = new ArrayList(); @@ -273,11 +273,11 @@ public class LocusIteratorByState extends LocusIterator { // constructors and other basic operations // // ----------------------------------------------------------------------------------------------------------------- - public LocusIteratorByState(final Iterator samIterator, Reads readInformation ) { + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation ) { this(samIterator, readInformation, NO_FILTERS); } - public LocusIteratorByState(final Iterator samIterator, Reads readInformation, List filters ) { + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, List filters ) { // Aggregate all sample names. // TODO: Push in header via constructor if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MalformedSAMFilteringIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/MalformedSAMFilteringIterator.java deleted file mode 100644 index d5c3a0ac7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MalformedSAMFilteringIterator.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.sam.SAMReadValidator; -import org.broadinstitute.sting.utils.sam.SAMReadValidationException; -import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; - -import java.util.NoSuchElementException; - -/** - * A decorating iterator that examines the stream of reads, discarding those - * that fail to meet a minimum standard for consumption by the GATK. - * - * @author hanna - * @version 0.1 - */ - -public class MalformedSAMFilteringIterator implements StingSAMIterator { - /** - * The header to validate reads against. - */ - private SAMFileHeader header = null; - - /** - * The wrapped iterator. Get reads from here. - */ - private StingSAMIterator wrapped = null; - - /** - * Collector for SAM read violations. - */ - private SAMReadViolationHistogram violations = null; - - /** - * The next SAMRecord to return.; - */ - private SAMRecord next = null; - - /** - * Creates a new MalformedSAMFilteringIterator, and provides a collector for the count - * @param wrapped The wrapped iterator to use as backing data. - * @param violations A structure to hold a breakdown of validator violations. - */ - public MalformedSAMFilteringIterator( SAMFileHeader header, StingSAMIterator wrapped, SAMReadViolationHistogram violations ) { - this.header = header; - this.wrapped = wrapped; - this.violations = violations; - seedNext(); - } - - /** - * Returns source information about the reads. - * @return - */ - public Reads getSourceInfo() { - return wrapped.getSourceInfo(); - } - - /** - * Gets an iterator, helpful for foreach loops. - * @return An iterator sharing the same state variables as the current iterator. - */ - public StingSAMIterator iterator() { - return this; - } - - /** - * Checks to see whether there's a - * @return True if a next is available, false otherwise. - */ - public boolean hasNext() { - return next != null; - } - - /** - * Gets the next valid record from the stream. - * @return Next valid record. - */ - public SAMRecord next() { - SAMRecord current = next; - if( current == null ) - throw new NoSuchElementException("MalformedSAMFilteringIterator: supply of reads is exhausted."); - seedNext(); - return current; - } - - /** - * Closes the wrapped iterator. - */ - public void close() { - wrapped.close(); - } - - /** - * Looks ahead for the next valid SAMRecord. - */ - protected void seedNext() { - next = null; - while( wrapped.hasNext() && next == null ) { - SAMRecord toTest = wrapped.next(); - try { - SAMReadValidator.validate(header,toTest); - next = toTest; - } - catch ( SAMReadValidationException ex ) { - violations.addViolation(ex); - } - } - } - - /** - * Throws an exception. Remove is not supported. - */ - public void remove() { throw new UnsupportedOperationException("Unable to remove from a StingSAMIterator"); } -} diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java index 6dc67132b..ac91af18f 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import java.util.Iterator; @@ -22,11 +22,10 @@ import java.util.NoSuchElementException; * A placeholder for an iterator with no data. */ public class NullSAMIterator implements StingSAMIterator { - private Reads reads = null; + private ReadProperties reads = null; - public NullSAMIterator( Reads reads ) { this.reads = reads; } + public NullSAMIterator( ReadProperties reads ) { this.reads = reads; } - public Reads getSourceInfo() { return reads; } public Iterator iterator() { return this; } public void close() { /* NO-OP */ } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java index 3c92d31ee..c2d3976ea 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; @@ -35,11 +35,6 @@ import net.sf.samtools.util.CloseableIterator; * @version 0.1 */ public class PositionTrackingIterator implements StingSAMIterator { - /** - * Source information about the reads. - */ - private Reads sourceInfo; - /** * The iterator being tracked. */ @@ -50,13 +45,6 @@ public class PositionTrackingIterator implements StingSAMIterator { */ private long position; - /** - * {@inheritDoc} - */ - public Reads getSourceInfo() { - return sourceInfo; - } - /** * Retrieves the current position of the iterator. The 'current position' of the iterator is defined as * the coordinate of the read that will be returned if next() is called. @@ -69,12 +57,10 @@ public class PositionTrackingIterator implements StingSAMIterator { /** * Create a new iterator wrapping the given position, assuming that the reader is position reads * into the sequence. - * @param sourceInfo Information about where these reads came from. * @param iterator Iterator to wraps. * @param position Non-negative position where the iterator currently sits. */ - public PositionTrackingIterator( Reads sourceInfo, CloseableIterator iterator, long position ) { - this.sourceInfo = sourceInfo; + public PositionTrackingIterator(CloseableIterator iterator, long position ) { this.iterator = iterator; this.position = position; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 3a08e0677..8675b0501 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMTag; import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.apache.log4j.Logger; @@ -35,14 +35,6 @@ public class ReadFormattingIterator implements StingSAMIterator { this.wrappedIterator = wrappedIterator; } - /** - * Get metadata about the reads' sources, etc. - * @return Source info about the reads. - */ - public Reads getSourceInfo() { - return wrappedIterator.getSourceInfo(); - } - /** * Convenience function for use in foreach loops. Dangerous because it does not actually * reset the iterator. diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index 499f41d76..d294993d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; /** * * User: aaron @@ -29,10 +29,4 @@ import org.broadinstitute.sting.gatk.Reads; * This is the standard interface for all iterators in the Sting package that iterate over SAMRecords */ public interface StingSAMIterator extends CloseableIterator, Iterable { - /** - * Gets source information for the reads. Contains information about the original reads - * files, plus information about downsampling, etc. - * @return - */ - public Reads getSourceInfo(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java index 4ecbd64dd..3bec57cd6 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java @@ -5,7 +5,7 @@ import net.sf.samtools.util.CloseableIterator; import java.util.Iterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.utils.StingException; /** @@ -36,12 +36,12 @@ import org.broadinstitute.sting.utils.StingException; */ public class StingSAMIteratorAdapter { - public static StingSAMIterator adapt(Reads sourceInfo, Iterator iter) { - return new PrivateStringSAMIterator(sourceInfo, iter); + public static StingSAMIterator adapt(Iterator iter) { + return new PrivateStringSAMIterator(iter); } - public static StingSAMIterator adapt(Reads sourceInfo, CloseableIterator iter) { - return new PrivateStringSAMCloseableIterator(sourceInfo, iter); + public static StingSAMIterator adapt(CloseableIterator iter) { + return new PrivateStringSAMCloseableIterator(iter); } } @@ -52,20 +52,12 @@ public class StingSAMIteratorAdapter { * methods that implement the iterable<> interface and the close() method from CloseableIterator */ class PrivateStringSAMIterator implements StingSAMIterator { - private Reads sourceInfo = null; private Iterator iter = null; - PrivateStringSAMIterator(Reads sourceInfo, Iterator iter) { - this.sourceInfo = sourceInfo; + PrivateStringSAMIterator(Iterator iter) { this.iter = iter; } - public Reads getSourceInfo() { - if( sourceInfo == null ) - throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); - return sourceInfo; - } - public void close() { // do nothing, we can't close the iterator anyway. } @@ -93,20 +85,12 @@ class PrivateStringSAMIterator implements StingSAMIterator { * methods that implement the iterable<> interface. */ class PrivateStringSAMCloseableIterator implements StingSAMIterator { - private Reads sourceInfo = null; private CloseableIterator iter = null; - PrivateStringSAMCloseableIterator(Reads sourceInfo, CloseableIterator iter) { - this.sourceInfo = sourceInfo; + PrivateStringSAMCloseableIterator(CloseableIterator iter) { this.iter = iter; } - public Reads getSourceInfo() { - if( sourceInfo == null ) - throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); - return sourceInfo; - } - public void close() { iter.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java index 42a12b910..804055140 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.util.RuntimeIOException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import java.util.Iterator; @@ -24,15 +24,6 @@ public class VerifyingSamIterator implements StingSAMIterator { this.it = it; } - /** - * Retrieves information about reads sources. - * @return Info about the sources of reads. - */ - public Reads getSourceInfo() { - return it.getSourceInfo(); - } - - public boolean hasNext() { return this.it.hasNext(); } public SAMRecord next() { diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 49c7b883d..fc4c03a44 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -2,18 +2,15 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; +import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.MathUtils; import java.util.Map; -import java.util.List; -import java.util.Iterator; - -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; public abstract class TraversalEngine,ProviderType extends ShardDataProvider> { // Time in milliseconds since we initialized this engine @@ -27,6 +24,12 @@ public abstract class TraversalEngine,Provide /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); + /** + * Gets the named traversal type associated with the given traversal. + * @return A user-friendly name for the given traversal type. + */ + protected abstract String getTraversalType(); + /** * @param curTime (current runtime, in millisecs) * @@ -39,23 +42,27 @@ public abstract class TraversalEngine,Provide /** * Forward request to printProgress * - * @param type the TRAVERSAL_TYPE of the traversal + * @param shard the given shard currently being processed. * @param loc the location */ - public void printProgress(final String type, GenomeLoc loc) { - printProgress(false, type, loc); + public void printProgress(Shard shard,GenomeLoc loc) { + // A bypass is inserted here for unit testing. + // TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. + ReadMetrics cumulativeMetrics = GenomeAnalysisEngine.instance != null ? GenomeAnalysisEngine.instance.getCumulativeMetrics().clone() : new ReadMetrics(); + cumulativeMetrics.incrementMetrics(shard.getReadMetrics()); + printProgress(loc, cumulativeMetrics, false); } /** * Utility routine that prints out process information (including timing) every N records or * every M seconds, for N and M set in global variables. * - * @param mustPrint If true, will print out info, regardless of nRecords or time interval - * @param type String to print out describing our atomic traversal type ("read", "locus", etc) * @param loc Current location + * @param metrics Metrics of reads filtered in/out. + * @param mustPrint If true, will print out info, regardless of nRecords or time interval */ - private void printProgress(boolean mustPrint, final String type, GenomeLoc loc) { - final long nRecords = TraversalStatistics.nRecords; + private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) { + final long nRecords = metrics.getNumIterations(); final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; //System.out.printf("Cur = %d, last print = %d, elapsed=%.2f, nRecords=%d, met=%b%n", curTime, lastProgressPrintTime, elapsed, nRecords, maxElapsedIntervalForPrinting(curTime)); @@ -64,44 +71,35 @@ public abstract class TraversalEngine,Provide this.lastProgressPrintTime = curTime; final double secsPer1MReads = (elapsed * 1000000.0) / nRecords; if (loc != null) - logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, type, elapsed, secsPer1MReads, type)); + logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, getTraversalType(), elapsed, secsPer1MReads, getTraversalType())); else - logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, type, elapsed, secsPer1MReads, type)); + logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, getTraversalType(), elapsed, secsPer1MReads, getTraversalType())); } } - /** - * A passthrough method so that subclasses can report which types of traversals they're using. - * - * @param sum Result of the computation. - */ - public abstract void printOnTraversalDone(T sum); - /** * Called after a traversal to print out information about the traversal process - * - * @param type describing this type of traversal - * @param sum The reduce result of the traversal */ - protected void printOnTraversalDone(final String type, T sum) { - printProgress(true, type, null); + public void printOnTraversalDone(ReadMetrics cumulativeMetrics) { + printProgress(null, cumulativeMetrics, true); + final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; // count up the number of skipped reads by summing over all filters long nSkippedReads = 0L; - for ( long counts : TraversalStatistics.counter.values() ) - nSkippedReads += counts; + for ( Map.Entry countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet()) + nSkippedReads += countsByFilter.getValue(); logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours%n", elapsed, elapsed / 60, elapsed / 3600)); logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)", nSkippedReads, - TraversalStatistics.nReads, - 100.0 * MathUtils.ratio(nSkippedReads, TraversalStatistics.nReads))); - for ( Map.Entry filterCounts : TraversalStatistics.counter.entrySet() ) { + cumulativeMetrics.getNumReadsSeen(), + 100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen()))); + for ( Map.Entry filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) { long count = filterCounts.getValue(); logger.info(String.format(" -> %d reads (%.2f%% of total) failing %s", - count, 100.0 * MathUtils.ratio(count, TraversalStatistics.nReads), Utils.getClassName(filterCounts.getKey()))); + count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey()))); } } @@ -122,15 +120,4 @@ public abstract class TraversalEngine,Provide public abstract T traverse(WalkerType walker, ProviderType dataProvider, T sum); - - public static Iterator addMandatoryFilteringIterators(Iterator iter, List filters ) { - for( SamRecordFilter filter : filters ) { - //logger.debug("Adding filter " + filter.getClass()); - iter = new CountingFilteringIterator(iter,filter); - } - - return new CountingFilteringIterator(iter); // special case to count all reads - } - - } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java deleted file mode 100755 index c49bd2f19..000000000 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java +++ /dev/null @@ -1,55 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import net.sf.picard.filter.SamRecordFilter; - -import java.util.Map; -import java.util.HashMap; - -import org.broadinstitute.sting.utils.Utils; - -/** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 8, 2009 - * Time: 4:13:40 PM - * - * Holds a bunch of basic information about the traversal. - * TODO: Make this a class that can be passed around from the TraversalEngine to other entries that want to update it. - */ -public class TraversalStatistics { - // Number of records (loci, reads) we've processed - public static long nRecords; - // How many reads have we processed, along with those skipped for various reasons - public static long nReads; - public static long nSkippedReads; - public static long nUnmappedReads; - public static long nNotPrimary; - public static long nBadAlignments; - public static long nSkippedIndels; - public static long nDuplicates; - public static Map counter = new HashMap(); - - static { - reset(); - } - - public static void incrementFilter(SamRecordFilter filter) { - long c = 0; - if ( counter.containsKey(filter.getClass()) ) { - c = counter.get(filter.getClass()); - } - - counter.put(filter.getClass(), c + 1L); - } - - public static void reset() { - nRecords = 0; - nReads = 0; - nSkippedReads = 0; - nUnmappedReads = 0; - nNotPrimary = 0; - nBadAlignments = 0; - nSkippedIndels = 0; - nDuplicates = 0; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index e535e1707..0a3c9b9c0 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.traversals; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; @@ -52,12 +50,14 @@ public class TraverseDuplicates extends TraversalEngine readsAtLoc(final SAMRecord read, PushbackIterator iter) { GenomeLoc site = GenomeLocParser.createGenomeLoc(read); ArrayList l = new ArrayList(); @@ -165,8 +165,7 @@ public class TraverseDuplicates extends TraversalEngine walker, ReadShardDataProvider dataProvider, T sum) { - Iterator filterIter = addMandatoryFilteringIterators(new ReadView(dataProvider).iterator(), walker.getMandatoryReadFilters()); - PushbackIterator iter = new PushbackIterator(filterIter); + PushbackIterator iter = new PushbackIterator(new ReadView(dataProvider).iterator()); /** * while we still have more reads: @@ -186,7 +185,7 @@ public class TraverseDuplicates extends TraversalEngine extends TraversalEngine extends TraversalEngine,LocusShardDataProvider> { - final private static String LOCI_STRING = "sites"; - /** * our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); + @Override + protected String getTraversalType() { + return "sites"; + } + @Override public T traverse( LocusWalker walker, LocusShardDataProvider dataProvider, - T sum ) { + T sum) { logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider)); LocusView locusView = getLocusView( walker, dataProvider ); @@ -48,7 +51,7 @@ public class TraverseLoci extends TraversalEngine,Locu AlignmentContext locus = locusView.next(); GenomeLoc location = locus.getLocation(); - TraversalStatistics.nRecords++; + dataProvider.getShard().getReadMetrics().incrementNumIterations(); if ( locus.hasExtendedEventPileup() ) { // if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions @@ -76,7 +79,7 @@ public class TraverseLoci extends TraversalEngine,Locu sum = walker.reduce(x, sum); } - printProgress(LOCI_STRING, locus.getLocation()); + printProgress(dataProvider.getShard(),locus.getLocation()); } } @@ -96,15 +99,6 @@ public class TraverseLoci extends TraversalEngine,Locu return sum; } - /** - * Temporary override of printOnTraversalDone. - * - * @param sum Result of the computation. - */ - public void printOnTraversalDone( T sum ) { - printOnTraversalDone(LOCI_STRING, sum ); - } - /** * Gets the best view of loci for this walker given the available data. * @param walker walker to interrogate. diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index ae40e80fd..7362b83a4 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; -import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.apache.log4j.Logger; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecordCoordinateComparator; @@ -24,8 +24,10 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine pairs = new ArrayList(); for(SAMRecord read: reads) { - TraversalStatistics.nReads++; + dataProvider.getShard().getReadMetrics().incrementNumReadsSeen(); if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) { // If this read name is the same as the last, accumulate it. @@ -55,17 +57,17 @@ public class TraverseReadPairs extends TraversalEngine 0) - sum = walkOverPairs(walker,pairs,sum); + sum = walkOverPairs(walker,dataProvider.getShard(),pairs,sum); return sum; } @@ -73,13 +75,14 @@ public class TraverseReadPairs extends TraversalEngine walker, List reads, T sum) { + private T walkOverPairs(ReadPairWalker walker, Shard shard, List reads, T sum) { // update the number of reads we've seen - TraversalStatistics.nRecords++; + shard.getReadMetrics().incrementNumIterations(); // Sort the reads present in coordinate order. Collections.sort(reads,new SAMRecordCoordinateComparator()); @@ -92,13 +95,4 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine,Read /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseReads.class); - /** descriptor of the type */ - private static final String READS_STRING = "reads"; + @Override + protected String getTraversalType() { + return "reads"; + } /** * Traverse by reads, given the data and the walker @@ -87,8 +90,9 @@ public class TraverseReads extends TraversalEngine,Read refContext = reference.getReferenceContext(read); // update the number of reads we've seen - TraversalStatistics.nRecords++; - TraversalStatistics.nReads++; + ReadMetrics readMetrics = dataProvider.getShard().getReadMetrics(); + readMetrics.incrementNumIterations(); + readMetrics.incrementNumReadsSeen(); // if the read is mapped, create a metadata tracker ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null; @@ -99,20 +103,9 @@ public class TraverseReads extends TraversalEngine,Read sum = walker.reduce(x, sum); } - printProgress(READS_STRING, - (read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) ? - null : - GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart())); + GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()); + printProgress(dataProvider.getShard(),locus); } return sum; } - - /** - * Temporary override of printOnTraversalDone. - * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported. - * @param sum Result of the computation. - */ - public void printOnTraversalDone( T sum ) { - printOnTraversalDone(READS_STRING, sum ); - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index d9a7a7121..1fc606f07 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -1,15 +1,12 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; -import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; import java.util.Set; -import java.util.ArrayList; import java.util.Arrays; import net.sf.samtools.SAMRecord; @@ -23,6 +20,7 @@ import net.sf.picard.filter.SamRecordFilter; * To change this template use File | Settings | File Templates. */ @Requires({DataSource.READS,DataSource.REFERENCE}) +@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class}) public abstract class DuplicateWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(GenomeLoc loc, AlignmentContext context, Set> readSets ) { @@ -34,20 +32,4 @@ public abstract class DuplicateWalker extends Walker getMandatoryReadFilters() { - SamRecordFilter filter1 = new UnmappedReadFilter(); - SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter(); - List x = super.getMandatoryReadFilters(); - - x.addAll(Arrays.asList(filter2, filter1)); - return x; - - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 274fecd87..516ece4a1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -4,15 +4,11 @@ import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; -import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter; import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import java.util.List; import java.util.Arrays; -import java.util.EnumSet; /** * Created by IntelliJ IDEA. @@ -23,6 +19,7 @@ import java.util.EnumSet; */ @By(DataSource.READS) @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) +@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class}) public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -32,28 +29,6 @@ public abstract class LocusWalker extends Walker getMandatoryReadFilters() { -// if ( false ) { -// SamRecordFilter filter = new LocusStreamFilterFunc(); -// return Arrays.asList(filter); -// } else { - SamRecordFilter filter1 = new UnmappedReadFilter(); - SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter(); - SamRecordFilter filter3 = new DuplicateReadFilter(); - SamRecordFilter filter4 = new FailsVendorQualityCheckReadFilter(); - - List x = super.getMandatoryReadFilters(); - x.addAll(Arrays.asList(filter4, filter3, filter2, filter1)); -// } - return x; - } - /** * Returns the set of locus iterator discards that this walker wants the engine to discard automatically * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index b5dc3b868..546eb16a8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.ArrayList; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.apache.log4j.Logger; @@ -42,6 +43,7 @@ import net.sf.picard.filter.SamRecordFilter; * Time: 1:53:31 PM * To change this template use File | Settings | File Templates. */ +@ReadFilters(MalformedReadFilter.class) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); @@ -135,15 +137,6 @@ public abstract class Walker { out.println("[REDUCE RESULT] Traversal result is: " + result); } - - /** - * Returns a list of SamRecordFilters that *must* be applied to the read stream for the traversal to work - * @return a list of SamRecordFilters to apply in order - */ - public List getMandatoryReadFilters() { - return new ArrayList(); // by default - } - /** * General interval reduce routine called after all of the traversals are done * @param results diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 65a3d41fd..5812c9aec 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.sam; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; @@ -71,6 +70,11 @@ public class ArtificialReadsTraversal extends TraversalEngine extends TraversalEngine walker, - ShardDataProvider dataProvider, - T sum ) { + ShardDataProvider dataProvider, + T sum ) { if (!( walker instanceof ReadWalker )) throw new IllegalArgumentException("Walker isn't a read walker!"); @@ -99,35 +103,15 @@ public class ArtificialReadsTraversal extends TraversalEngine violations = new HashMap(); - - /** - * Add a violation to the database of violations. For now, track - * only the number of occurrrences of a given violation. - * @param violation Violation to add, generated by the SAMReadValidator. - */ - public void addViolation( SAMReadValidationException violation ) { - String message = violation.getMessage(); - if( !violations.containsKey( message ) ) - violations.put( message, 0L ); - violations.put(message,violations.get(message)+1); - } - - public long getViolationCount() { - long totalViolations = 0L; - Collection violationCounts = violations.values(); - for( Long violationCount: violationCounts ) - totalViolations += violationCount; - return totalViolations; - } - - public String toString() { - if( getViolationCount() == 0 ) - return ""; - - StringBuilder violationOutput = new StringBuilder(); - violationOutput.append("Eliminated malformed reads for the following reasons:\n"); - for(Map.Entry violation: violations.entrySet()) - violationOutput.append( String.format("\t%s: %d%n", violation.getKey(), violation.getValue()) ); - - return violationOutput.toString(); - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 22fb231c3..88708efe4 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -5,12 +5,13 @@ import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLoc; @@ -50,8 +51,8 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(); GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(Collections.singletonList(shardBounds),Collections.emptyMap()); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.emptyList())),Collections.singletonList(shardBounds),Collections.emptyMap()); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); @@ -67,7 +68,7 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new MockLocusShard(Collections.singletonList(shardBounds)); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); @@ -82,7 +83,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -96,7 +97,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -110,7 +111,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -124,7 +125,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -138,7 +139,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -153,7 +154,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -172,7 +173,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -191,7 +192,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -212,7 +213,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -240,7 +241,7 @@ public abstract class LocusViewTemplate extends BaseTest { read07, read08, read09, read10, read11, read12); Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); + WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -336,11 +337,6 @@ public abstract class LocusViewTemplate extends BaseTest { backingIterator = backingList.iterator(); } - public Reads getSourceInfo() { - // There are no sources for these reads. - return new Reads(new ArrayList()); - } - public boolean hasNext() { return backingIterator.hasNext(); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index 0e12516c3..ace85cece 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -25,8 +25,12 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import java.util.List; +import java.util.Collections; +import java.io.File; /** * A mock locus shard, usable for infrastructure that requires a shard to behave properly. @@ -36,6 +40,6 @@ import java.util.List; */ public class MockLocusShard extends LocusShard { public MockLocusShard(final List intervals) { - super(intervals,null); + super(new SAMDataSource(new ReadProperties(Collections.emptyList())),intervals,null); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index 95376deeb..e1280ad8e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import static junit.framework.Assert.fail; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; @@ -9,7 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; import org.junit.After; @@ -85,7 +84,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // setup the data fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - Reads reads = new Reads(fl); + ReadProperties reads = new ReadProperties(fl); // the sharding strat. SAMDataSource data = new SAMDataSource(reads); @@ -130,7 +129,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // setup the test files fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - Reads reads = new Reads(fl); + ReadProperties reads = new ReadProperties(fl); // the sharding strat. SAMDataSource data = new SAMDataSource(reads); @@ -172,7 +171,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { fl.clear(); fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - reads = new Reads(fl); + reads = new ReadProperties(fl); count = 0; // the sharding strat. diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java index 07a4baa44..d999f52c3 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java @@ -7,7 +7,7 @@ import net.sf.samtools.SAMRecord; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -101,15 +101,6 @@ class testIterator implements StingSAMIterator { testIterator() { header = ArtificialSAMUtils.createArtificialSamHeader(1,1,2000); } - /** - * Gets source information for the reads. Contains information about the original reads - * files, plus information about downsampling, etc. - * - * @return - */ - public Reads getSourceInfo() { - return null; - } public void close() { diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 195f97295..bd9ba3439 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -1,20 +1,16 @@ package org.broadinstitute.sting.gatk.iterators; import junit.framework.Assert; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -44,8 +40,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; // create a test version of the Reads object - Reads readAttributes = new Reads(new ArrayList()); - JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true); + ReadProperties readAttributes = new ReadProperties(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); before.setReadBases(bases); @@ -96,8 +92,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; // create a test version of the Reads object - Reads readAttributes = new Reads(new ArrayList()); - JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true); + ReadProperties readAttributes = new ReadProperties(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); before.setReadBases(bases); @@ -144,8 +140,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20)); // create a test version of the Reads object - Reads reads = new Reads(new ArrayList()); - JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS); + ReadProperties reads = new ReadProperties(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); @@ -170,8 +166,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20)); // create a test version of the Reads object - Reads reads = new Reads(new ArrayList()); - JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS); + ReadProperties reads = new ReadProperties(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterUnitTest.java index 6b8c869c2..5a9a42cdb 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterUnitTest.java @@ -97,7 +97,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest { final int COUNT = 100; MyTestIterator it = new MyTestIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); int countCheck = 0; while (samIt.hasNext()) { samIt.next(); @@ -116,7 +116,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest { MyTestCloseableIterator it = new MyTestCloseableIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); int countCheck = 0; while (samIt.hasNext()) { @@ -133,7 +133,7 @@ public class StingSAMIteratorAdapterUnitTest extends BaseTest { MyTestCloseableIterator it = new MyTestCloseableIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); int countCheck = 0; diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 52f125302..6448b5b3a 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -3,7 +3,8 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; @@ -109,7 +110,7 @@ public class TraverseReadsUnitTest extends BaseTest { ref = new IndexedFastaSequenceFile(refFile); GenomeLocParser.setupRefContigOrdering(ref); - SAMDataSource dataSource = new SAMDataSource(new Reads(bamList)); + SAMDataSource dataSource = new SAMDataSource(new ReadProperties(bamList)); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), readSize); @@ -129,7 +130,7 @@ public class TraverseReadsUnitTest extends BaseTest { dataProvider.close(); } - traversalEngine.printOnTraversalDone("reads", accumulator); + traversalEngine.printOnTraversalDone(new ReadMetrics()); countReadWalker.onTraversalDone(accumulator); if (!(accumulator instanceof Integer)) {