Merge pull request #235 from broadinstitute/eb_several_traversal_printout_fixes

Eb several traversal printout fixes
This commit is contained in:
Mark DePristo 2013-05-21 13:13:11 -07:00
commit 3cfe2dcc64
16 changed files with 445 additions and 185 deletions

View File

@ -519,7 +519,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
generatePlots();
}
logger.info("Processed: " + result + " reads");
logger.info("BaseRecalibrator was able to recalibrate " + result + " reads");
}
private RecalibrationTables getRecalibrationTable() {

View File

@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable {
private long nRecords;
// How many reads have we processed, along with those skipped for various reasons
private long nReads;
private long nSkippedReads;
private long nUnmappedReads;
private long nNotPrimary;
private long nBadAlignments;
private long nSkippedIndels;
private long nDuplicates;
private Map<Class, Long> counter = new HashMap<Class, Long>();
// keep track of filtered records by filter type (class)
private Map<Class, Long> filterCounter = new HashMap<>();
/**
* Combines these metrics with a set of other metrics, storing the results in this class.
* @param metrics The metrics to fold into this class.
*/
public void incrementMetrics(ReadMetrics metrics) {
public synchronized void incrementMetrics(ReadMetrics metrics) {
nRecords += metrics.nRecords;
nReads += metrics.nReads;
nSkippedReads += metrics.nSkippedReads;
nUnmappedReads += metrics.nUnmappedReads;
nNotPrimary += metrics.nNotPrimary;
nBadAlignments += metrics.nBadAlignments;
nSkippedIndels += metrics.nSkippedIndels;
nDuplicates += metrics.nDuplicates;
for(Map.Entry<Class,Long> counterEntry: metrics.counter.entrySet()) {
for(Map.Entry<Class,Long> counterEntry: metrics.filterCounter.entrySet()) {
Class counterType = counterEntry.getKey();
long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue();
counter.put(counterType,newValue);
long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue();
filterCounter.put(counterType, newValue);
}
}
/**
* Create a copy of the given read metrics.
* @return
* @return a non-null clone
*/
public ReadMetrics clone() {
ReadMetrics newMetrics;
@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable {
}
newMetrics.nRecords = nRecords;
newMetrics.nReads = nReads;
newMetrics.nSkippedReads = nSkippedReads;
newMetrics.nUnmappedReads = nUnmappedReads;
newMetrics.nNotPrimary = nNotPrimary;
newMetrics.nBadAlignments = nBadAlignments;
newMetrics.nSkippedIndels = nSkippedIndels;
newMetrics.nDuplicates = nDuplicates;
newMetrics.counter = new HashMap<Class,Long>(counter);
newMetrics.filterCounter = new HashMap<>(filterCounter);
return newMetrics;
}
@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable {
public void incrementFilter(SamRecordFilter filter) {
long c = 0;
if ( counter.containsKey(filter.getClass()) ) {
c = counter.get(filter.getClass());
if ( filterCounter.containsKey(filter.getClass()) ) {
c = filterCounter.get(filter.getClass());
}
counter.put(filter.getClass(), c + 1L);
filterCounter.put(filter.getClass(), c + 1L);
}
public Map<String,Long> getCountsByFilter() {
final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
final TreeMap<String, Long> sortedCounts = new TreeMap<>();
for(Map.Entry<Class,Long> counterEntry: filterCounter.entrySet()) {
sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
}
return sortedCounts;
@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable {
public void incrementNumReadsSeen() {
nReads++;
}
/**
* Gets the cumulative number of reads skipped in the course of this run.
* @return Cumulative number of reads skipped in the course of this run.
*/
public long getNumSkippedReads() {
return nSkippedReads;
}
/**
* Increments the cumulative number of reads skipped in the course of this run.
*/
public void incrementNumSkippedReads() {
nSkippedReads++;
}
/**
* Gets the number of unmapped reads skipped in the course of this run.
* @return The number of unmapped reads skipped.
*/
public long getNumUnmappedReads() {
return nUnmappedReads;
}
/**
* Increments the number of unmapped reads skipped in the course of this run.
*/
public void incrementNumUnmappedReads() {
nUnmappedReads++;
}
/**
*
* @return
*/
public long getNumNonPrimaryReads() {
return nNotPrimary;
}
/**
*
*/
public void incrementNumNonPrimaryReads() {
nNotPrimary++;
}
/**
*
* @return
*/
public long getNumBadAlignments() {
return nBadAlignments;
}
/**
*
*/
public void incrementNumBadAlignments() {
nBadAlignments++;
}
/**
*
* @return
*/
public long getNumSkippedIndels() {
return nSkippedIndels;
}
/**
*
*/
public void incrementNumSkippedIndels() {
nSkippedIndels++;
}
/**
*
* @return
*/
public long getNumDuplicates() {
return nDuplicates;
}
/**
*
*/
public void incrementNumDuplicates() {
nDuplicates++;
}
}

View File

@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView {
/**
* Gets the reference context associated with this particular point or extended interval on the genome.
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down.
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down.
* @return The base at the position represented by this genomeLoc.
*/
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
//validateLocation( genomeLoc );
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(),
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(),
getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
int refStart = -1;

View File

@ -440,9 +440,8 @@ public class SAMDataSource {
* @return Cumulative read metrics.
*/
public ReadMetrics getCumulativeReadMetrics() {
synchronized(readMetrics) {
return readMetrics.clone();
}
// don't return a clone here because the engine uses a pointer to this object
return readMetrics;
}
/**
@ -450,9 +449,7 @@ public class SAMDataSource {
* @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics.
*/
public void incorporateReadMetrics(final ReadMetrics readMetrics) {
synchronized(this.readMetrics) {
this.readMetrics.incrementMetrics(readMetrics);
}
this.readMetrics.incrementMetrics(readMetrics);
}
public StingSAMIterator seek(Shard shard) {
@ -548,7 +545,10 @@ public class SAMDataSource {
MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
return applyDecoratingIterators(shard.getReadMetrics(),
// The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's
// metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when
// we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen.
return applyDecoratingIterators(readMetrics,
enableVerification,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),

View File

@ -52,7 +52,6 @@ import javax.management.ObjectName;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
nSkippedReads += countsByFilter;
logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)",
logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)",
nSkippedReads,
cumulativeMetrics.getNumReadsSeen(),
100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen())));

View File

@ -1,28 +1,28 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
@ -41,7 +41,8 @@ import java.util.NoSuchElementException;
* @author Mark DePristo
*/
public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private final ReadMetrics runtimeMetrics;
private final ReadMetrics globalRuntimeMetrics;
private final ReadMetrics privateRuntimeMetrics;
private final Iterator<SAMRecord> iterator;
private final Collection<ReadFilter> filters;
private SAMRecord next = null;
@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
* @param filters the filter (which may be a FilterAggregator)
*/
public CountingFilteringIterator(ReadMetrics metrics, Iterator<SAMRecord> iterator, Collection<ReadFilter> filters) {
this.runtimeMetrics = metrics;
this.globalRuntimeMetrics = metrics;
privateRuntimeMetrics = new ReadMetrics();
this.iterator = iterator;
this.filters = filters;
next = getNextRecord();
@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
public void close() {
CloserUtil.close(iterator);
// update the global metrics with all the data we collected here
globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics);
}
/**
@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
private SAMRecord getNextRecord() {
while (iterator.hasNext()) {
SAMRecord record = iterator.next();
runtimeMetrics.incrementNumReadsSeen();
// update only the private copy of the metrics so that we don't need to worry about race conditions
// that can arise when trying to update the global copy; it was agreed that this is the cleanest solution.
privateRuntimeMetrics.incrementNumReadsSeen();
boolean filtered = false;
for(SamRecordFilter filter: filters) {
if(filter.filterOut(record)) {
runtimeMetrics.incrementFilter(filter);
privateRuntimeMetrics.incrementFilter(filter);
filtered = true;
break;
}

View File

@ -100,15 +100,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
// by default there's nothing to do
}
/**
* Update the cumulative traversal metrics according to the data in this shard
*
* @param shard a non-null shard
*/
public void updateCumulativeMetrics(final Shard shard) {
updateCumulativeMetrics(shard.getReadMetrics());
}
/**
* Update the cumulative traversal metrics according to the data in this shard
*

View File

@ -259,8 +259,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
updateCumulativeMetrics(dataProvider.getShard());
return result;
}

View File

@ -196,7 +196,6 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
sum = walker.reduce(x, sum);
}
updateCumulativeMetrics(dataProvider.getShard());
printProgress(site.getStopLocation());
done = walker.isDone();
}

View File

@ -92,7 +92,6 @@ public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
sum = result.reduceResult;
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
updateCumulativeMetrics(dataProvider.getShard());
}
// We have a final map call to execute here to clean up the skipped based from the

View File

@ -90,7 +90,6 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
pairs.clear();
pairs.add(read);
updateCumulativeMetrics(dataProvider.getShard());
printProgress(null);
}

View File

@ -101,8 +101,6 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
final Iterator<MapData> aggregatedInputs = aggregateMapData(dataProvider);
final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce);
updateCumulativeMetrics(dataProvider.getShard());
return result;
}

View File

@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReads extends ReadWalker<Integer, Integer> implements NanoSchedulable {
public class CountReads extends ReadWalker<Integer, Long> implements NanoSchedulable {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return 1;
}
@Override public Integer reduceInit() { return 0; }
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override public Long reduceInit() { return 0L; }
public Long reduce(Integer value, Long sum) { return (long) value + sum; }
public void onTraversalDone(Long result) {
logger.info("CountReads counted " + result + " reads in the traversal");
}
}

View File

@ -0,0 +1,321 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.*;
public class ReadMetricsUnitTest extends BaseTest {
@Test
public void testReadsSeenDoNotOverflowInt() {
final ReadMetrics metrics = new ReadMetrics();
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
metrics.incrementNumReadsSeen();
}
Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt);
Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE);
logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE));
}
// Test the accuracy of the read metrics
private IndexedFastaSequenceFile reference;
private SAMSequenceDictionary dictionary;
private SAMFileHeader header;
private GATKSAMReadGroupRecord readGroup;
private GenomeLocParser genomeLocParser;
private File testBAM;
private static final int numReadsPerContig = 250000;
private static final List<String> contigs = Arrays.asList("1", "2", "3");
@BeforeClass
private void init() throws IOException {
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
header.setSequenceDictionary(dictionary);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test"));
final List<GATKSAMRecord> reads = new ArrayList<>();
for ( final String contig : contigs ) {
for ( int i = 1; i <= numReadsPerContig; i++ ) {
reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i));
}
}
createBAM(reads);
}
private void createBAM(final List<GATKSAMRecord> reads) throws IOException {
testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam");
testBAM.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM);
for (GATKSAMRecord read : reads ) {
out.addAlignment(read);
}
out.close();
new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit();
new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit();
}
// copied from LocusViewTemplate
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) {
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
record.setReferenceIndex(dictionary.getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
record.setCigarString("1M");
record.setReadString("A");
record.setBaseQualityString("A");
record.setReadGroup(readGroup);
return record;
}
@Test
public void testCountsFromReadTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
final DummyReadWalker walker = new DummyReadWalker();
traverseReadsNano.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) {
final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList<ReferenceOrderedDataSource>());
traverseReadsNano.traverse(walker, dataProvider, 0);
dataProvider.close();
}
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
@Test
public void testCountsFromLocusTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
final DummyLocusWalker walker = new DummyLocusWalker();
traverseLociNano.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) {
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
traverseLociNano.traverse(walker, dataProvider, 0);
dataProvider.close();
}
windowMaker.close();
}
//dataSource.close();
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
@Test
public void testCountsFromActiveRegionTraversal() {
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
final Collection<SAMReaderID> samFiles = new ArrayList<>();
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
samFiles.add(readerID);
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
new ArrayList<ReadTransformer>(),
false, (byte)30, false, true);
engine.setReadsDataSource(dataSource);
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
final List<GenomeLoc> intervals = new ArrayList<>(contigs.size());
for ( final String contig : contigs )
intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig));
final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions();
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
traverseActiveRegions.initialize(engine, walker, null);
for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) {
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
traverseActiveRegions.traverse(walker, dataProvider, 0);
dataProvider.close();
}
windowMaker.close();
}
traverseActiveRegions.endTraversal(walker, 0);
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
}
class DummyLocusWalker extends LocusWalker<Integer, Integer> {
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
class DummyReadWalker extends ReadWalker<Integer, Integer> {
@Override
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
@Override
public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new ActivityProfileState(ref.getLocus(), 0.0);
}
@Override
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
}

View File

@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest {
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger.");
if (!(accumulator instanceof Long)) {
fail("Count read walker should return a Long.");
}
if (((Integer) accumulator) != 10000) {
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
if (!accumulator.equals(new Long(10000))) {
fail("there should be 10000 mapped reads in the index file, there was " + (accumulator));
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
public class CountReadsUnitTest extends BaseTest {
@Test
public void testReadsDoNotOverflowInt() {
final CountReads walker = new CountReads();
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
Long sum = walker.reduceInit();
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
final Integer x = walker.map(null, null, null);
sum = walker.reduce(x, sum);
}
Assert.assertEquals(sum.longValue(), moreThanMaxInt);
Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE);
}
}