From 1f3624d2046738a0a8f827489e49b6a8282c7477 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 11:59:18 -0400 Subject: [PATCH 1/3] Base Recalibrator doesn't recalibrate all reads, so the final output line was confusing --- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index dde49b7db..278317da3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -519,7 +519,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche generatePlots(); } - logger.info("Processed: " + result + " reads"); + logger.info("BaseRecalibrator was able to recalibrate " + result + " reads"); } private RecalibrationTables getRecalibrationTable() { From 58f4b8122221e052143e4d0e4771bd0a52995c17 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 12:23:24 -0400 Subject: [PATCH 2/3] Count Reads should use a Long instead of an Integer for counts to prevent overflows. Added unit test. --- .../sting/gatk/walkers/qc/CountReads.java | 11 +++-- .../traversals/TraverseReadsUnitTest.java | 8 +-- .../gatk/walkers/qc/CountReadsUnitTest.java | 49 +++++++++++++++++++ 3 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java index 825fcac90..45beea28f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java @@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountReads extends ReadWalker implements NanoSchedulable { +public class CountReads extends ReadWalker implements NanoSchedulable { public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { return 1; } - @Override public Integer reduceInit() { return 0; } - @Override public Integer reduce(Integer value, Integer sum) { return value + sum; } + @Override public Long reduceInit() { return 0L; } + + public Long reduce(Integer value, Long sum) { return (long) value + sum; } + + public void onTraversalDone(Long result) { + logger.info("CountReads counted " + result + " reads in the traversal"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 8bc373fe8..e8840c39f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest { countReadWalker.onTraversalDone(accumulator); - if (!(accumulator instanceof Integer)) { - fail("Count read walker should return an interger."); + if (!(accumulator instanceof Long)) { + fail("Count read walker should return a Long."); } - if (((Integer) accumulator) != 10000) { - fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator)); + if (!accumulator.equals(new Long(10000))) { + fail("there should be 10000 mapped reads in the index file, there was " + (accumulator)); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java new file mode 100644 index 000000000..cf115cc76 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CountReadsUnitTest { + + @Test + public void testReadsDoNotOverflowInt() { + + final CountReads walker = new CountReads(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + Long sum = walker.reduceInit(); + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + final Integer x = walker.map(null, null, null); + sum = walker.reduce(x, sum); + } + + Assert.assertEquals(sum.longValue(), moreThanMaxInt); + } +} From 20c7a8903020900b1dbc9af0c7e1877118bbe764 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 13:53:43 -0400 Subject: [PATCH 3/3] Fixes to get accurate read counts for Read traversals 1. Don't clone the dataSource's metrics object (because then the engine won't continue to get updated counts) 2. Use the dataSource's metrics object in the CountingFilteringIterator and not the first shard's object! 3. Synchronize ReadMetrics.incrementMetrics to prevent race conditions. Also: * Make sure users realize that the read counts are approximate in the print outs. * Removed a lot of unused cruft from the metrics object while I was in there. * Added test to make sure that the ReadMetrics read count does not overflow ints. * Added unit tests for traversal metrics (reads, loci, and active region traversals); these test counts of reads and records. --- .../sting/gatk/ReadMetrics.java | 135 +------- .../providers/LocusReferenceView.java | 4 +- .../gatk/datasources/reads/SAMDataSource.java | 14 +- .../sting/gatk/executive/MicroScheduler.java | 3 +- .../filters/CountingFilteringIterator.java | 65 ++-- .../gatk/traversals/TraversalEngine.java | 9 - .../traversals/TraverseActiveRegions.java | 2 - .../gatk/traversals/TraverseDuplicates.java | 1 - .../gatk/traversals/TraverseLociNano.java | 1 - .../gatk/traversals/TraverseReadPairs.java | 1 - .../gatk/traversals/TraverseReadsNano.java | 2 - .../sting/gatk/ReadMetricsUnitTest.java | 321 ++++++++++++++++++ .../gatk/walkers/qc/CountReadsUnitTest.java | 4 +- 13 files changed, 384 insertions(+), 178 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index aadb57985..f73e7ccd5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable { private long nRecords; // How many reads have we processed, along with those skipped for various reasons private long nReads; - private long nSkippedReads; - private long nUnmappedReads; - private long nNotPrimary; - private long nBadAlignments; - private long nSkippedIndels; - private long nDuplicates; - private Map counter = new HashMap(); + + // keep track of filtered records by filter type (class) + private Map filterCounter = new HashMap<>(); /** * Combines these metrics with a set of other metrics, storing the results in this class. * @param metrics The metrics to fold into this class. */ - public void incrementMetrics(ReadMetrics metrics) { + public synchronized void incrementMetrics(ReadMetrics metrics) { nRecords += metrics.nRecords; nReads += metrics.nReads; - nSkippedReads += metrics.nSkippedReads; - nUnmappedReads += metrics.nUnmappedReads; - nNotPrimary += metrics.nNotPrimary; - nBadAlignments += metrics.nBadAlignments; - nSkippedIndels += metrics.nSkippedIndels; - nDuplicates += metrics.nDuplicates; - for(Map.Entry counterEntry: metrics.counter.entrySet()) { + for(Map.Entry counterEntry: metrics.filterCounter.entrySet()) { Class counterType = counterEntry.getKey(); - long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue(); - counter.put(counterType,newValue); + long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue(); + filterCounter.put(counterType, newValue); } } /** * Create a copy of the given read metrics. - * @return + * @return a non-null clone */ public ReadMetrics clone() { ReadMetrics newMetrics; @@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable { } newMetrics.nRecords = nRecords; newMetrics.nReads = nReads; - newMetrics.nSkippedReads = nSkippedReads; - newMetrics.nUnmappedReads = nUnmappedReads; - newMetrics.nNotPrimary = nNotPrimary; - newMetrics.nBadAlignments = nBadAlignments; - newMetrics.nSkippedIndels = nSkippedIndels; - newMetrics.nDuplicates = nDuplicates; - newMetrics.counter = new HashMap(counter); + newMetrics.filterCounter = new HashMap<>(filterCounter); return newMetrics; } @@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable { public void incrementFilter(SamRecordFilter filter) { long c = 0; - if ( counter.containsKey(filter.getClass()) ) { - c = counter.get(filter.getClass()); + if ( filterCounter.containsKey(filter.getClass()) ) { + c = filterCounter.get(filter.getClass()); } - counter.put(filter.getClass(), c + 1L); + filterCounter.put(filter.getClass(), c + 1L); } public Map getCountsByFilter() { - final TreeMap sortedCounts = new TreeMap(); - for(Map.Entry counterEntry: counter.entrySet()) { + final TreeMap sortedCounts = new TreeMap<>(); + for(Map.Entry counterEntry: filterCounter.entrySet()) { sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue()); } return sortedCounts; @@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable { public void incrementNumReadsSeen() { nReads++; } - - /** - * Gets the cumulative number of reads skipped in the course of this run. - * @return Cumulative number of reads skipped in the course of this run. - */ - public long getNumSkippedReads() { - return nSkippedReads; - } - - /** - * Increments the cumulative number of reads skipped in the course of this run. - */ - public void incrementNumSkippedReads() { - nSkippedReads++; - } - - /** - * Gets the number of unmapped reads skipped in the course of this run. - * @return The number of unmapped reads skipped. - */ - public long getNumUnmappedReads() { - return nUnmappedReads; - } - - /** - * Increments the number of unmapped reads skipped in the course of this run. - */ - public void incrementNumUnmappedReads() { - nUnmappedReads++; - } - - /** - * - * @return - */ - public long getNumNonPrimaryReads() { - return nNotPrimary; - } - - /** - * - */ - public void incrementNumNonPrimaryReads() { - nNotPrimary++; - } - - /** - * - * @return - */ - public long getNumBadAlignments() { - return nBadAlignments; - } - - /** - * - */ - public void incrementNumBadAlignments() { - nBadAlignments++; - } - - /** - * - * @return - */ - public long getNumSkippedIndels() { - return nSkippedIndels; - } - - /** - * - */ - public void incrementNumSkippedIndels() { - nSkippedIndels++; - } - - /** - * - * @return - */ - public long getNumDuplicates() { - return nDuplicates; - } - - /** - * - */ - public void incrementNumDuplicates() { - nDuplicates++; - } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index d5b7d0487..b5efbc693 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView { /** * Gets the reference context associated with this particular point or extended interval on the genome. - * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down. + * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down. * @return The base at the position represented by this genomeLoc. */ public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) { //validateLocation( genomeLoc ); - GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(), + GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); int refStart = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 1223dd2af..bf25582ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -440,9 +440,8 @@ public class SAMDataSource { * @return Cumulative read metrics. */ public ReadMetrics getCumulativeReadMetrics() { - synchronized(readMetrics) { - return readMetrics.clone(); - } + // don't return a clone here because the engine uses a pointer to this object + return readMetrics; } /** @@ -450,9 +449,7 @@ public class SAMDataSource { * @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics. */ public void incorporateReadMetrics(final ReadMetrics readMetrics) { - synchronized(this.readMetrics) { - this.readMetrics.incrementMetrics(readMetrics); - } + this.readMetrics.incrementMetrics(readMetrics); } public StingSAMIterator seek(Shard shard) { @@ -548,7 +545,10 @@ public class SAMDataSource { MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap); - return applyDecoratingIterators(shard.getReadMetrics(), + // The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's + // metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when + // we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen. + return applyDecoratingIterators(readMetrics, enableVerification, readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 4ffdc88d8..7077db49c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -52,7 +52,6 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; -import java.util.concurrent.TimeUnit; /** @@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values()) nSkippedReads += countsByFilter; - logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)", + logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)", nSkippedReads, cumulativeMetrics.getNumReadsSeen(), 100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen()))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 3e50632d9..6c926e3cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -1,28 +1,28 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; @@ -41,7 +41,8 @@ import java.util.NoSuchElementException; * @author Mark DePristo */ public class CountingFilteringIterator implements CloseableIterator { - private final ReadMetrics runtimeMetrics; + private final ReadMetrics globalRuntimeMetrics; + private final ReadMetrics privateRuntimeMetrics; private final Iterator iterator; private final Collection filters; private SAMRecord next = null; @@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator { * @param filters the filter (which may be a FilterAggregator) */ public CountingFilteringIterator(ReadMetrics metrics, Iterator iterator, Collection filters) { - this.runtimeMetrics = metrics; + this.globalRuntimeMetrics = metrics; + privateRuntimeMetrics = new ReadMetrics(); this.iterator = iterator; this.filters = filters; next = getNextRecord(); @@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator { public void close() { CloserUtil.close(iterator); + // update the global metrics with all the data we collected here + globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics); } /** @@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator { private SAMRecord getNextRecord() { while (iterator.hasNext()) { SAMRecord record = iterator.next(); - runtimeMetrics.incrementNumReadsSeen(); + + // update only the private copy of the metrics so that we don't need to worry about race conditions + // that can arise when trying to update the global copy; it was agreed that this is the cleanest solution. + privateRuntimeMetrics.incrementNumReadsSeen(); boolean filtered = false; for(SamRecordFilter filter: filters) { if(filter.filterOut(record)) { - runtimeMetrics.incrementFilter(filter); + privateRuntimeMetrics.incrementFilter(filter); filtered = true; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 0811e5e70..529b3ef17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -100,15 +100,6 @@ public abstract class TraversalEngine,Provide // by default there's nothing to do } - /** - * Update the cumulative traversal metrics according to the data in this shard - * - * @param shard a non-null shard - */ - public void updateCumulativeMetrics(final Shard shard) { - updateCumulativeMetrics(shard.getReadMetrics()); - } - /** * Update the cumulative traversal metrics according to the data in this shard * diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index b1e5b907f..cac93cb07 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -259,8 +259,6 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine, final TraverseResults result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum ); sum = result.reduceResult; dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations); - updateCumulativeMetrics(dataProvider.getShard()); } // We have a final map call to execute here to clean up the skipped based from the diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index aed88509e..764011a48 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -90,7 +90,6 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine, final Iterator aggregatedInputs = aggregateMapData(dataProvider); final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce); - updateCumulativeMetrics(dataProvider.getShard()); - return result; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java new file mode 100644 index 000000000..32fd35d95 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -0,0 +1,321 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.reads.*; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.sam.*; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.*; + +public class ReadMetricsUnitTest extends BaseTest { + + @Test + public void testReadsSeenDoNotOverflowInt() { + + final ReadMetrics metrics = new ReadMetrics(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + metrics.incrementNumReadsSeen(); + } + + Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt); + Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE); + + logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE)); + } + + + // Test the accuracy of the read metrics + + private IndexedFastaSequenceFile reference; + private SAMSequenceDictionary dictionary; + private SAMFileHeader header; + private GATKSAMReadGroupRecord readGroup; + private GenomeLocParser genomeLocParser; + private File testBAM; + + private static final int numReadsPerContig = 250000; + private static final List contigs = Arrays.asList("1", "2", "3"); + + @BeforeClass + private void init() throws IOException { + reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + dictionary = reference.getSequenceDictionary(); + genomeLocParser = new GenomeLocParser(dictionary); + header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); + header.setSequenceDictionary(dictionary); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test")); + + final List reads = new ArrayList<>(); + for ( final String contig : contigs ) { + for ( int i = 1; i <= numReadsPerContig; i++ ) { + reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i)); + } + } + + createBAM(reads); + } + + private void createBAM(final List reads) throws IOException { + testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam"); + testBAM.deleteOnExit(); + + SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM); + for (GATKSAMRecord read : reads ) { + out.addAlignment(read); + } + out.close(); + + new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit(); + new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit(); + } + + // copied from LocusViewTemplate + protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { + GATKSAMRecord record = new GATKSAMRecord(header); + + record.setReadName(readName); + record.setReferenceIndex(dictionary.getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + + record.setCigarString("1M"); + record.setReadString("A"); + record.setBaseQualityString("A"); + record.setReadGroup(readGroup); + + return record; + } + + @Test + public void testCountsFromReadTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + + final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1); + final DummyReadWalker walker = new DummyReadWalker(); + traverseReadsNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) { + final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList()); + traverseReadsNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromLocusTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final TraverseLociNano traverseLociNano = new TraverseLociNano(1); + final DummyLocusWalker walker = new DummyLocusWalker(); + traverseLociNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseLociNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + + //dataSource.close(); + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromActiveRegionTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final List intervals = new ArrayList<>(contigs.size()); + for ( final String contig : contigs ) + intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig)); + + final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions(); + final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + traverseActiveRegions.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseActiveRegions.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + traverseActiveRegions.endTraversal(walker, 0); + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + class DummyLocusWalker extends LocusWalker { + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyReadWalker extends ReadWalker { + @Override + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyActiveRegionWalker extends ActiveRegionWalker { + @Override + public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return new ActivityProfileState(ref.getLocus(), 0.0); + } + + @Override + public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java index cf115cc76..8f5541c41 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java @@ -25,10 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.Test; -public class CountReadsUnitTest { +public class CountReadsUnitTest extends BaseTest { @Test public void testReadsDoNotOverflowInt() { @@ -45,5 +46,6 @@ public class CountReadsUnitTest { } Assert.assertEquals(sum.longValue(), moreThanMaxInt); + Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE); } }