Fixes to get accurate read counts for Read traversals

1. Don't clone the dataSource's metrics object (because then the engine won't continue to get updated counts)
 2. Use the dataSource's metrics object in the CountingFilteringIterator and not the first shard's object!
 3. Synchronize ReadMetrics.incrementMetrics to prevent race conditions.

Also:
 * Make sure users realize that the read counts are approximate in the print outs.
 * Removed a lot of unused cruft from the metrics object while I was in there.
 * Added test to make sure that the ReadMetrics read count does not overflow ints.
 * Added unit tests for traversal metrics (reads, loci, and active region traversals); these test counts of reads and records.
This commit is contained in:
Eric Banks 2013-05-07 13:53:43 -04:00
parent 58f4b81222
commit 20c7a89030
13 changed files with 384 additions and 178 deletions

View File

@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable {
private long nRecords;
// How many reads have we processed, along with those skipped for various reasons
private long nReads;
private long nSkippedReads;
private long nUnmappedReads;
private long nNotPrimary;
private long nBadAlignments;
private long nSkippedIndels;
private long nDuplicates;
private Map<Class, Long> counter = new HashMap<Class, Long>();
// keep track of filtered records by filter type (class)
private Map<Class, Long> filterCounter = new HashMap<>();
/**
* Combines these metrics with a set of other metrics, storing the results in this class.
* @param metrics The metrics to fold into this class.
*/
public void incrementMetrics(ReadMetrics metrics) {
public synchronized void incrementMetrics(ReadMetrics metrics) {
nRecords += metrics.nRecords;
nReads += metrics.nReads;
nSkippedReads += metrics.nSkippedReads;
nUnmappedReads += metrics.nUnmappedReads;
nNotPrimary += metrics.nNotPrimary;
nBadAlignments += metrics.nBadAlignments;
nSkippedIndels += metrics.nSkippedIndels;
nDuplicates += metrics.nDuplicates;
for(Map.Entry<Class,Long> counterEntry: metrics.counter.entrySet()) {
for(Map.Entry<Class,Long> counterEntry: metrics.filterCounter.entrySet()) {
Class counterType = counterEntry.getKey();
long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue();
counter.put(counterType,newValue);
long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue();
filterCounter.put(counterType, newValue);
}
}
/**
* Create a copy of the given read metrics.
* @return
* @return a non-null clone
*/
public ReadMetrics clone() {
ReadMetrics newMetrics;
@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable {
}
newMetrics.nRecords = nRecords;
newMetrics.nReads = nReads;
newMetrics.nSkippedReads = nSkippedReads;
newMetrics.nUnmappedReads = nUnmappedReads;
newMetrics.nNotPrimary = nNotPrimary;
newMetrics.nBadAlignments = nBadAlignments;
newMetrics.nSkippedIndels = nSkippedIndels;
newMetrics.nDuplicates = nDuplicates;
newMetrics.counter = new HashMap<Class,Long>(counter);
newMetrics.filterCounter = new HashMap<>(filterCounter);
return newMetrics;
}
@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable {
public void incrementFilter(SamRecordFilter filter) {
long c = 0;
if ( counter.containsKey(filter.getClass()) ) {
c = counter.get(filter.getClass());
if ( filterCounter.containsKey(filter.getClass()) ) {
c = filterCounter.get(filter.getClass());
}
counter.put(filter.getClass(), c + 1L);
filterCounter.put(filter.getClass(), c + 1L);
}
public Map<String,Long> getCountsByFilter() {
final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
final TreeMap<String, Long> sortedCounts = new TreeMap<>();
for(Map.Entry<Class,Long> counterEntry: filterCounter.entrySet()) {
sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
}
return sortedCounts;
@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable {
public void incrementNumReadsSeen() {
nReads++;
}
/**
* Gets the cumulative number of reads skipped in the course of this run.
* @return Cumulative number of reads skipped in the course of this run.
*/
public long getNumSkippedReads() {
return nSkippedReads;
}
/**
* Increments the cumulative number of reads skipped in the course of this run.
*/
public void incrementNumSkippedReads() {
nSkippedReads++;
}
/**
* Gets the number of unmapped reads skipped in the course of this run.
* @return The number of unmapped reads skipped.
*/
public long getNumUnmappedReads() {
return nUnmappedReads;
}
/**
* Increments the number of unmapped reads skipped in the course of this run.
*/
public void incrementNumUnmappedReads() {
nUnmappedReads++;
}
/**
*
* @return
*/
public long getNumNonPrimaryReads() {
return nNotPrimary;
}
/**
*
*/
public void incrementNumNonPrimaryReads() {
nNotPrimary++;
}
/**
*
* @return
*/
public long getNumBadAlignments() {
return nBadAlignments;
}
/**
*
*/
public void incrementNumBadAlignments() {
nBadAlignments++;
}
/**
*
* @return
*/
public long getNumSkippedIndels() {
return nSkippedIndels;
}
/**
*
*/
public void incrementNumSkippedIndels() {
nSkippedIndels++;
}
/**
*
* @return
*/
public long getNumDuplicates() {
return nDuplicates;
}
/**
*
*/
public void incrementNumDuplicates() {
nDuplicates++;
}
}

View File

@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView {
/**
* Gets the reference context associated with this particular point or extended interval on the genome.
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down.
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down.
* @return The base at the position represented by this genomeLoc.
*/
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
//validateLocation( genomeLoc );
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(),
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(),
getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
int refStart = -1;

View File

@ -440,9 +440,8 @@ public class SAMDataSource {
* @return Cumulative read metrics.
*/
public ReadMetrics getCumulativeReadMetrics() {
synchronized(readMetrics) {
return readMetrics.clone();
}
// don't return a clone here because the engine uses a pointer to this object
return readMetrics;
}
/**
@ -450,9 +449,7 @@ public class SAMDataSource {
* @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics.
*/
public void incorporateReadMetrics(final ReadMetrics readMetrics) {
synchronized(this.readMetrics) {
this.readMetrics.incrementMetrics(readMetrics);
}
this.readMetrics.incrementMetrics(readMetrics);
}
public StingSAMIterator seek(Shard shard) {
@ -548,7 +545,10 @@ public class SAMDataSource {
MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
return applyDecoratingIterators(shard.getReadMetrics(),
// The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's
// metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when
// we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen.
return applyDecoratingIterators(readMetrics,
enableVerification,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),

View File

@ -52,7 +52,6 @@ import javax.management.ObjectName;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
nSkippedReads += countsByFilter;
logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)",
logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)",
nSkippedReads,
cumulativeMetrics.getNumReadsSeen(),
100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen())));

View File

@ -1,28 +1,28 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
@ -41,7 +41,8 @@ import java.util.NoSuchElementException;
* @author Mark DePristo
*/
public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private final ReadMetrics runtimeMetrics;
private final ReadMetrics globalRuntimeMetrics;
private final ReadMetrics privateRuntimeMetrics;
private final Iterator<SAMRecord> iterator;
private final Collection<ReadFilter> filters;
private SAMRecord next = null;
@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
* @param filters the filter (which may be a FilterAggregator)
*/
public CountingFilteringIterator(ReadMetrics metrics, Iterator<SAMRecord> iterator, Collection<ReadFilter> filters) {
this.runtimeMetrics = metrics;
this.globalRuntimeMetrics = metrics;
privateRuntimeMetrics = new ReadMetrics();
this.iterator = iterator;
this.filters = filters;
next = getNextRecord();
@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
public void close() {
CloserUtil.close(iterator);
// update the global metrics with all the data we collected here
globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics);
}
/**
@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private SAMRecord getNextRecord() {
while (iterator.hasNext()) {
SAMRecord record = iterator.next();
runtimeMetrics.incrementNumReadsSeen();
// update only the private copy of the metrics so that we don't need to worry about race conditions
// that can arise when trying to update the global copy; it was agreed that this is the cleanest solution.
privateRuntimeMetrics.incrementNumReadsSeen();
boolean filtered = false;
for(SamRecordFilter filter: filters) {
if(filter.filterOut(record)) {
runtimeMetrics.incrementFilter(filter);
privateRuntimeMetrics.incrementFilter(filter);
filtered = true;
break;
}

View File

@ -100,15 +100,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
// by default there's nothing to do
}
/**
* Update the cumulative traversal metrics according to the data in this shard
*
* @param shard a non-null shard
*/
public void updateCumulativeMetrics(final Shard shard) {
updateCumulativeMetrics(shard.getReadMetrics());
}
/**
* Update the cumulative traversal metrics according to the data in this shard
*

View File

@ -259,8 +259,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
updateCumulativeMetrics(dataProvider.getShard());
return result;
}

View File

@ -196,7 +196,6 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
sum = walker.reduce(x, sum);
}
updateCumulativeMetrics(dataProvider.getShard());
printProgress(site.getStopLocation());
done = walker.isDone();
}

View File

@ -92,7 +92,6 @@ public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
sum = result.reduceResult;
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
updateCumulativeMetrics(dataProvider.getShard());
}
// We have a final map call to execute here to clean up the skipped based from the

View File

@ -90,7 +90,6 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
pairs.clear();
pairs.add(read);
updateCumulativeMetrics(dataProvider.getShard());
printProgress(null);
}

View File

@ -101,8 +101,6 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
final Iterator<MapData> aggregatedInputs = aggregateMapData(dataProvider);
final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce);
updateCumulativeMetrics(dataProvider.getShard());
return result;
}

View File

@ -0,0 +1,321 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.*;
public class ReadMetricsUnitTest extends BaseTest {
@Test
public void testReadsSeenDoNotOverflowInt() {
final ReadMetrics metrics = new ReadMetrics();
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
metrics.incrementNumReadsSeen();
}
Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt);
Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE);
logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE));
}
// Test the accuracy of the read metrics
private IndexedFastaSequenceFile reference;
private SAMSequenceDictionary dictionary;
private SAMFileHeader header;
private GATKSAMReadGroupRecord readGroup;
private GenomeLocParser genomeLocParser;
private File testBAM;
private static final int numReadsPerContig = 250000;
private static final List<String> contigs = Arrays.asList("1", "2", "3");
@BeforeClass
private void init() throws IOException {
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
header.setSequenceDictionary(dictionary);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test"));
final List<GATKSAMRecord> reads = new ArrayList<>();
for ( final String contig : contigs ) {
for ( int i = 1; i <= numReadsPerContig; i++ ) {
reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i));
}
}
createBAM(reads);
}
private void createBAM(final List<GATKSAMRecord> reads) throws IOException {
testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam");
testBAM.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM);
for (GATKSAMRecord read : reads ) {
out.addAlignment(read);
}
out.close();
new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit();
new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit();
}
// copied from LocusViewTemplate
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) {
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
record.setReferenceIndex(dictionary.getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
record.setCigarString("1M");
record.setReadString("A");
record.setBaseQualityString("A");
record.setReadGroup(readGroup);
return record;
}
@Test
public void testCountsFromReadTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
final DummyReadWalker walker = new DummyReadWalker();
traverseReadsNano.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) {
final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList<ReferenceOrderedDataSource>());
traverseReadsNano.traverse(walker, dataProvider, 0);
dataProvider.close();
}
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
@Test
public void testCountsFromLocusTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
final DummyLocusWalker walker = new DummyLocusWalker();
traverseLociNano.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) {
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
traverseLociNano.traverse(walker, dataProvider, 0);
dataProvider.close();
}
windowMaker.close();
}
//dataSource.close();
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
@Test
public void testCountsFromActiveRegionTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
final List<GenomeLoc> intervals = new ArrayList<>(contigs.size());
for ( final String contig : contigs )
intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig));
final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions();
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
traverseActiveRegions.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) {
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
traverseActiveRegions.traverse(walker, dataProvider, 0);
dataProvider.close();
}
windowMaker.close();
}
traverseActiveRegions.endTraversal(walker, 0);
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
class DummyLocusWalker extends LocusWalker<Integer, Integer> {
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
class DummyReadWalker extends ReadWalker<Integer, Integer> {
@Override
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
@Override
public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new ActivityProfileState(ref.getLocus(), 0.0);
}
@Override
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
}

View File

@ -25,10 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
public class CountReadsUnitTest {
public class CountReadsUnitTest extends BaseTest {
@Test
public void testReadsDoNotOverflowInt() {
@ -45,5 +46,6 @@ public class CountReadsUnitTest {
}
Assert.assertEquals(sum.longValue(), moreThanMaxInt);
Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE);
}
}