diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index dde49b7db..278317da3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -519,7 +519,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche generatePlots(); } - logger.info("Processed: " + result + " reads"); + logger.info("BaseRecalibrator was able to recalibrate " + result + " reads"); } private RecalibrationTables getRecalibrationTable() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index aadb57985..f73e7ccd5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable { private long nRecords; // How many reads have we processed, along with those skipped for various reasons private long nReads; - private long nSkippedReads; - private long nUnmappedReads; - private long nNotPrimary; - private long nBadAlignments; - private long nSkippedIndels; - private long nDuplicates; - private Map counter = new HashMap(); + + // keep track of filtered records by filter type (class) + private Map filterCounter = new HashMap<>(); /** * Combines these metrics with a set of other metrics, storing the results in this class. * @param metrics The metrics to fold into this class. */ - public void incrementMetrics(ReadMetrics metrics) { + public synchronized void incrementMetrics(ReadMetrics metrics) { nRecords += metrics.nRecords; nReads += metrics.nReads; - nSkippedReads += metrics.nSkippedReads; - nUnmappedReads += metrics.nUnmappedReads; - nNotPrimary += metrics.nNotPrimary; - nBadAlignments += metrics.nBadAlignments; - nSkippedIndels += metrics.nSkippedIndels; - nDuplicates += metrics.nDuplicates; - for(Map.Entry counterEntry: metrics.counter.entrySet()) { + for(Map.Entry counterEntry: metrics.filterCounter.entrySet()) { Class counterType = counterEntry.getKey(); - long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue(); - counter.put(counterType,newValue); + long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue(); + filterCounter.put(counterType, newValue); } } /** * Create a copy of the given read metrics. - * @return + * @return a non-null clone */ public ReadMetrics clone() { ReadMetrics newMetrics; @@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable { } newMetrics.nRecords = nRecords; newMetrics.nReads = nReads; - newMetrics.nSkippedReads = nSkippedReads; - newMetrics.nUnmappedReads = nUnmappedReads; - newMetrics.nNotPrimary = nNotPrimary; - newMetrics.nBadAlignments = nBadAlignments; - newMetrics.nSkippedIndels = nSkippedIndels; - newMetrics.nDuplicates = nDuplicates; - newMetrics.counter = new HashMap(counter); + newMetrics.filterCounter = new HashMap<>(filterCounter); return newMetrics; } @@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable { public void incrementFilter(SamRecordFilter filter) { long c = 0; - if ( counter.containsKey(filter.getClass()) ) { - c = counter.get(filter.getClass()); + if ( filterCounter.containsKey(filter.getClass()) ) { + c = filterCounter.get(filter.getClass()); } - counter.put(filter.getClass(), c + 1L); + filterCounter.put(filter.getClass(), c + 1L); } public Map getCountsByFilter() { - final TreeMap sortedCounts = new TreeMap(); - for(Map.Entry counterEntry: counter.entrySet()) { + final TreeMap sortedCounts = new TreeMap<>(); + for(Map.Entry counterEntry: filterCounter.entrySet()) { sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue()); } return sortedCounts; @@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable { public void incrementNumReadsSeen() { nReads++; } - - /** - * Gets the cumulative number of reads skipped in the course of this run. - * @return Cumulative number of reads skipped in the course of this run. - */ - public long getNumSkippedReads() { - return nSkippedReads; - } - - /** - * Increments the cumulative number of reads skipped in the course of this run. - */ - public void incrementNumSkippedReads() { - nSkippedReads++; - } - - /** - * Gets the number of unmapped reads skipped in the course of this run. - * @return The number of unmapped reads skipped. - */ - public long getNumUnmappedReads() { - return nUnmappedReads; - } - - /** - * Increments the number of unmapped reads skipped in the course of this run. - */ - public void incrementNumUnmappedReads() { - nUnmappedReads++; - } - - /** - * - * @return - */ - public long getNumNonPrimaryReads() { - return nNotPrimary; - } - - /** - * - */ - public void incrementNumNonPrimaryReads() { - nNotPrimary++; - } - - /** - * - * @return - */ - public long getNumBadAlignments() { - return nBadAlignments; - } - - /** - * - */ - public void incrementNumBadAlignments() { - nBadAlignments++; - } - - /** - * - * @return - */ - public long getNumSkippedIndels() { - return nSkippedIndels; - } - - /** - * - */ - public void incrementNumSkippedIndels() { - nSkippedIndels++; - } - - /** - * - * @return - */ - public long getNumDuplicates() { - return nDuplicates; - } - - /** - * - */ - public void incrementNumDuplicates() { - nDuplicates++; - } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index d5b7d0487..b5efbc693 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView { /** * Gets the reference context associated with this particular point or extended interval on the genome. - * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down. + * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down. * @return The base at the position represented by this genomeLoc. */ public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) { //validateLocation( genomeLoc ); - GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(), + GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); int refStart = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 1223dd2af..bf25582ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -440,9 +440,8 @@ public class SAMDataSource { * @return Cumulative read metrics. */ public ReadMetrics getCumulativeReadMetrics() { - synchronized(readMetrics) { - return readMetrics.clone(); - } + // don't return a clone here because the engine uses a pointer to this object + return readMetrics; } /** @@ -450,9 +449,7 @@ public class SAMDataSource { * @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics. */ public void incorporateReadMetrics(final ReadMetrics readMetrics) { - synchronized(this.readMetrics) { - this.readMetrics.incrementMetrics(readMetrics); - } + this.readMetrics.incrementMetrics(readMetrics); } public StingSAMIterator seek(Shard shard) { @@ -548,7 +545,10 @@ public class SAMDataSource { MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap); - return applyDecoratingIterators(shard.getReadMetrics(), + // The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's + // metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when + // we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen. + return applyDecoratingIterators(readMetrics, enableVerification, readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 4ffdc88d8..7077db49c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -52,7 +52,6 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; -import java.util.concurrent.TimeUnit; /** @@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values()) nSkippedReads += countsByFilter; - logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)", + logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)", nSkippedReads, cumulativeMetrics.getNumReadsSeen(), 100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen()))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 3e50632d9..6c926e3cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -1,28 +1,28 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; @@ -41,7 +41,8 @@ import java.util.NoSuchElementException; * @author Mark DePristo */ public class CountingFilteringIterator implements CloseableIterator { - private final ReadMetrics runtimeMetrics; + private final ReadMetrics globalRuntimeMetrics; + private final ReadMetrics privateRuntimeMetrics; private final Iterator iterator; private final Collection filters; private SAMRecord next = null; @@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator { * @param filters the filter (which may be a FilterAggregator) */ public CountingFilteringIterator(ReadMetrics metrics, Iterator iterator, Collection filters) { - this.runtimeMetrics = metrics; + this.globalRuntimeMetrics = metrics; + privateRuntimeMetrics = new ReadMetrics(); this.iterator = iterator; this.filters = filters; next = getNextRecord(); @@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator { public void close() { CloserUtil.close(iterator); + // update the global metrics with all the data we collected here + globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics); } /** @@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator { private SAMRecord getNextRecord() { while (iterator.hasNext()) { SAMRecord record = iterator.next(); - runtimeMetrics.incrementNumReadsSeen(); + + // update only the private copy of the metrics so that we don't need to worry about race conditions + // that can arise when trying to update the global copy; it was agreed that this is the cleanest solution. + privateRuntimeMetrics.incrementNumReadsSeen(); boolean filtered = false; for(SamRecordFilter filter: filters) { if(filter.filterOut(record)) { - runtimeMetrics.incrementFilter(filter); + privateRuntimeMetrics.incrementFilter(filter); filtered = true; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 0811e5e70..529b3ef17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -100,15 +100,6 @@ public abstract class TraversalEngine,Provide // by default there's nothing to do } - /** - * Update the cumulative traversal metrics according to the data in this shard - * - * @param shard a non-null shard - */ - public void updateCumulativeMetrics(final Shard shard) { - updateCumulativeMetrics(shard.getReadMetrics()); - } - /** * Update the cumulative traversal metrics according to the data in this shard * diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index b1e5b907f..cac93cb07 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -259,8 +259,6 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine, final TraverseResults result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum ); sum = result.reduceResult; dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations); - updateCumulativeMetrics(dataProvider.getShard()); } // We have a final map call to execute here to clean up the skipped based from the diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index aed88509e..764011a48 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -90,7 +90,6 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine, final Iterator aggregatedInputs = aggregateMapData(dataProvider); final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce); - updateCumulativeMetrics(dataProvider.getShard()); - return result; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java index 825fcac90..45beea28f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java @@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountReads extends ReadWalker implements NanoSchedulable { +public class CountReads extends ReadWalker implements NanoSchedulable { public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { return 1; } - @Override public Integer reduceInit() { return 0; } - @Override public Integer reduce(Integer value, Integer sum) { return value + sum; } + @Override public Long reduceInit() { return 0L; } + + public Long reduce(Integer value, Long sum) { return (long) value + sum; } + + public void onTraversalDone(Long result) { + logger.info("CountReads counted " + result + " reads in the traversal"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java new file mode 100644 index 000000000..32fd35d95 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -0,0 +1,321 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.reads.*; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.sam.*; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.*; + +public class ReadMetricsUnitTest extends BaseTest { + + @Test + public void testReadsSeenDoNotOverflowInt() { + + final ReadMetrics metrics = new ReadMetrics(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + metrics.incrementNumReadsSeen(); + } + + Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt); + Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE); + + logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE)); + } + + + // Test the accuracy of the read metrics + + private IndexedFastaSequenceFile reference; + private SAMSequenceDictionary dictionary; + private SAMFileHeader header; + private GATKSAMReadGroupRecord readGroup; + private GenomeLocParser genomeLocParser; + private File testBAM; + + private static final int numReadsPerContig = 250000; + private static final List contigs = Arrays.asList("1", "2", "3"); + + @BeforeClass + private void init() throws IOException { + reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + dictionary = reference.getSequenceDictionary(); + genomeLocParser = new GenomeLocParser(dictionary); + header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); + header.setSequenceDictionary(dictionary); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test")); + + final List reads = new ArrayList<>(); + for ( final String contig : contigs ) { + for ( int i = 1; i <= numReadsPerContig; i++ ) { + reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i)); + } + } + + createBAM(reads); + } + + private void createBAM(final List reads) throws IOException { + testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam"); + testBAM.deleteOnExit(); + + SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM); + for (GATKSAMRecord read : reads ) { + out.addAlignment(read); + } + out.close(); + + new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit(); + new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit(); + } + + // copied from LocusViewTemplate + protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { + GATKSAMRecord record = new GATKSAMRecord(header); + + record.setReadName(readName); + record.setReferenceIndex(dictionary.getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + + record.setCigarString("1M"); + record.setReadString("A"); + record.setBaseQualityString("A"); + record.setReadGroup(readGroup); + + return record; + } + + @Test + public void testCountsFromReadTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + + final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1); + final DummyReadWalker walker = new DummyReadWalker(); + traverseReadsNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) { + final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList()); + traverseReadsNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromLocusTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final TraverseLociNano traverseLociNano = new TraverseLociNano(1); + final DummyLocusWalker walker = new DummyLocusWalker(); + traverseLociNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseLociNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + + //dataSource.close(); + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromActiveRegionTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final List intervals = new ArrayList<>(contigs.size()); + for ( final String contig : contigs ) + intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig)); + + final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions(); + final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + traverseActiveRegions.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseActiveRegions.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + traverseActiveRegions.endTraversal(walker, 0); + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + class DummyLocusWalker extends LocusWalker { + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyReadWalker extends ReadWalker { + @Override + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyActiveRegionWalker extends ActiveRegionWalker { + @Override + public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return new ActivityProfileState(ref.getLocus(), 0.0); + } + + @Override + public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 8bc373fe8..e8840c39f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest { countReadWalker.onTraversalDone(accumulator); - if (!(accumulator instanceof Integer)) { - fail("Count read walker should return an interger."); + if (!(accumulator instanceof Long)) { + fail("Count read walker should return a Long."); } - if (((Integer) accumulator) != 10000) { - fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator)); + if (!accumulator.equals(new Long(10000))) { + fail("there should be 10000 mapped reads in the index file, there was " + (accumulator)); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java new file mode 100644 index 000000000..8f5541c41 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java @@ -0,0 +1,51 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CountReadsUnitTest extends BaseTest { + + @Test + public void testReadsDoNotOverflowInt() { + + final CountReads walker = new CountReads(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + Long sum = walker.reduceInit(); + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + final Integer x = walker.map(null, null, null); + sum = walker.reduce(x, sum); + } + + Assert.assertEquals(sum.longValue(), moreThanMaxInt); + Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE); + } +}