Merge pull request #235 from broadinstitute/eb_several_traversal_printout_fixes
Eb several traversal printout fixes
This commit is contained in:
commit
3cfe2dcc64
|
|
@ -519,7 +519,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
generatePlots();
|
||||
}
|
||||
|
||||
logger.info("Processed: " + result + " reads");
|
||||
logger.info("BaseRecalibrator was able to recalibrate " + result + " reads");
|
||||
}
|
||||
|
||||
private RecalibrationTables getRecalibrationTable() {
|
||||
|
|
|
|||
|
|
@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable {
|
|||
private long nRecords;
|
||||
// How many reads have we processed, along with those skipped for various reasons
|
||||
private long nReads;
|
||||
private long nSkippedReads;
|
||||
private long nUnmappedReads;
|
||||
private long nNotPrimary;
|
||||
private long nBadAlignments;
|
||||
private long nSkippedIndels;
|
||||
private long nDuplicates;
|
||||
private Map<Class, Long> counter = new HashMap<Class, Long>();
|
||||
|
||||
// keep track of filtered records by filter type (class)
|
||||
private Map<Class, Long> filterCounter = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Combines these metrics with a set of other metrics, storing the results in this class.
|
||||
* @param metrics The metrics to fold into this class.
|
||||
*/
|
||||
public void incrementMetrics(ReadMetrics metrics) {
|
||||
public synchronized void incrementMetrics(ReadMetrics metrics) {
|
||||
nRecords += metrics.nRecords;
|
||||
nReads += metrics.nReads;
|
||||
nSkippedReads += metrics.nSkippedReads;
|
||||
nUnmappedReads += metrics.nUnmappedReads;
|
||||
nNotPrimary += metrics.nNotPrimary;
|
||||
nBadAlignments += metrics.nBadAlignments;
|
||||
nSkippedIndels += metrics.nSkippedIndels;
|
||||
nDuplicates += metrics.nDuplicates;
|
||||
for(Map.Entry<Class,Long> counterEntry: metrics.counter.entrySet()) {
|
||||
for(Map.Entry<Class,Long> counterEntry: metrics.filterCounter.entrySet()) {
|
||||
Class counterType = counterEntry.getKey();
|
||||
long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue();
|
||||
counter.put(counterType,newValue);
|
||||
long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue();
|
||||
filterCounter.put(counterType, newValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a copy of the given read metrics.
|
||||
* @return
|
||||
* @return a non-null clone
|
||||
*/
|
||||
public ReadMetrics clone() {
|
||||
ReadMetrics newMetrics;
|
||||
|
|
@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable {
|
|||
}
|
||||
newMetrics.nRecords = nRecords;
|
||||
newMetrics.nReads = nReads;
|
||||
newMetrics.nSkippedReads = nSkippedReads;
|
||||
newMetrics.nUnmappedReads = nUnmappedReads;
|
||||
newMetrics.nNotPrimary = nNotPrimary;
|
||||
newMetrics.nBadAlignments = nBadAlignments;
|
||||
newMetrics.nSkippedIndels = nSkippedIndels;
|
||||
newMetrics.nDuplicates = nDuplicates;
|
||||
newMetrics.counter = new HashMap<Class,Long>(counter);
|
||||
newMetrics.filterCounter = new HashMap<>(filterCounter);
|
||||
|
||||
return newMetrics;
|
||||
}
|
||||
|
|
@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable {
|
|||
|
||||
public void incrementFilter(SamRecordFilter filter) {
|
||||
long c = 0;
|
||||
if ( counter.containsKey(filter.getClass()) ) {
|
||||
c = counter.get(filter.getClass());
|
||||
if ( filterCounter.containsKey(filter.getClass()) ) {
|
||||
c = filterCounter.get(filter.getClass());
|
||||
}
|
||||
|
||||
counter.put(filter.getClass(), c + 1L);
|
||||
filterCounter.put(filter.getClass(), c + 1L);
|
||||
}
|
||||
|
||||
public Map<String,Long> getCountsByFilter() {
|
||||
final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
|
||||
for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
|
||||
final TreeMap<String, Long> sortedCounts = new TreeMap<>();
|
||||
for(Map.Entry<Class,Long> counterEntry: filterCounter.entrySet()) {
|
||||
sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
|
||||
}
|
||||
return sortedCounts;
|
||||
|
|
@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable {
|
|||
public void incrementNumReadsSeen() {
|
||||
nReads++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the cumulative number of reads skipped in the course of this run.
|
||||
* @return Cumulative number of reads skipped in the course of this run.
|
||||
*/
|
||||
public long getNumSkippedReads() {
|
||||
return nSkippedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the cumulative number of reads skipped in the course of this run.
|
||||
*/
|
||||
public void incrementNumSkippedReads() {
|
||||
nSkippedReads++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of unmapped reads skipped in the course of this run.
|
||||
* @return The number of unmapped reads skipped.
|
||||
*/
|
||||
public long getNumUnmappedReads() {
|
||||
return nUnmappedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the number of unmapped reads skipped in the course of this run.
|
||||
*/
|
||||
public void incrementNumUnmappedReads() {
|
||||
nUnmappedReads++;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public long getNumNonPrimaryReads() {
|
||||
return nNotPrimary;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void incrementNumNonPrimaryReads() {
|
||||
nNotPrimary++;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public long getNumBadAlignments() {
|
||||
return nBadAlignments;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void incrementNumBadAlignments() {
|
||||
nBadAlignments++;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public long getNumSkippedIndels() {
|
||||
return nSkippedIndels;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void incrementNumSkippedIndels() {
|
||||
nSkippedIndels++;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public long getNumDuplicates() {
|
||||
return nDuplicates;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void incrementNumDuplicates() {
|
||||
nDuplicates++;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView {
|
|||
|
||||
/**
|
||||
* Gets the reference context associated with this particular point or extended interval on the genome.
|
||||
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down.
|
||||
* @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down.
|
||||
* @return The base at the position represented by this genomeLoc.
|
||||
*/
|
||||
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
|
||||
//validateLocation( genomeLoc );
|
||||
|
||||
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(),
|
||||
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(),
|
||||
getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
|
||||
|
||||
int refStart = -1;
|
||||
|
|
|
|||
|
|
@ -440,9 +440,8 @@ public class SAMDataSource {
|
|||
* @return Cumulative read metrics.
|
||||
*/
|
||||
public ReadMetrics getCumulativeReadMetrics() {
|
||||
synchronized(readMetrics) {
|
||||
return readMetrics.clone();
|
||||
}
|
||||
// don't return a clone here because the engine uses a pointer to this object
|
||||
return readMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -450,9 +449,7 @@ public class SAMDataSource {
|
|||
* @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics.
|
||||
*/
|
||||
public void incorporateReadMetrics(final ReadMetrics readMetrics) {
|
||||
synchronized(this.readMetrics) {
|
||||
this.readMetrics.incrementMetrics(readMetrics);
|
||||
}
|
||||
this.readMetrics.incrementMetrics(readMetrics);
|
||||
}
|
||||
|
||||
public StingSAMIterator seek(Shard shard) {
|
||||
|
|
@ -548,7 +545,10 @@ public class SAMDataSource {
|
|||
|
||||
MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
|
||||
|
||||
return applyDecoratingIterators(shard.getReadMetrics(),
|
||||
// The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's
|
||||
// metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when
|
||||
// we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen.
|
||||
return applyDecoratingIterators(readMetrics,
|
||||
enableVerification,
|
||||
readProperties.useOriginalBaseQualities(),
|
||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ import javax.management.ObjectName;
|
|||
import java.io.File;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
|
||||
nSkippedReads += countsByFilter;
|
||||
|
||||
logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)",
|
||||
logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)",
|
||||
nSkippedReads,
|
||||
cumulativeMetrics.getNumReadsSeen(),
|
||||
100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen())));
|
||||
|
|
|
|||
|
|
@ -1,28 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
|
@ -41,7 +41,8 @@ import java.util.NoSuchElementException;
|
|||
* @author Mark DePristo
|
||||
*/
|
||||
public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
|
||||
private final ReadMetrics runtimeMetrics;
|
||||
private final ReadMetrics globalRuntimeMetrics;
|
||||
private final ReadMetrics privateRuntimeMetrics;
|
||||
private final Iterator<SAMRecord> iterator;
|
||||
private final Collection<ReadFilter> filters;
|
||||
private SAMRecord next = null;
|
||||
|
|
@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
|
|||
* @param filters the filter (which may be a FilterAggregator)
|
||||
*/
|
||||
public CountingFilteringIterator(ReadMetrics metrics, Iterator<SAMRecord> iterator, Collection<ReadFilter> filters) {
|
||||
this.runtimeMetrics = metrics;
|
||||
this.globalRuntimeMetrics = metrics;
|
||||
privateRuntimeMetrics = new ReadMetrics();
|
||||
this.iterator = iterator;
|
||||
this.filters = filters;
|
||||
next = getNextRecord();
|
||||
|
|
@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
|
|||
|
||||
public void close() {
|
||||
CloserUtil.close(iterator);
|
||||
// update the global metrics with all the data we collected here
|
||||
globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator<SAMRecord> {
|
|||
private SAMRecord getNextRecord() {
|
||||
while (iterator.hasNext()) {
|
||||
SAMRecord record = iterator.next();
|
||||
runtimeMetrics.incrementNumReadsSeen();
|
||||
|
||||
// update only the private copy of the metrics so that we don't need to worry about race conditions
|
||||
// that can arise when trying to update the global copy; it was agreed that this is the cleanest solution.
|
||||
privateRuntimeMetrics.incrementNumReadsSeen();
|
||||
|
||||
boolean filtered = false;
|
||||
for(SamRecordFilter filter: filters) {
|
||||
if(filter.filterOut(record)) {
|
||||
runtimeMetrics.incrementFilter(filter);
|
||||
privateRuntimeMetrics.incrementFilter(filter);
|
||||
filtered = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,15 +100,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
// by default there's nothing to do
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the cumulative traversal metrics according to the data in this shard
|
||||
*
|
||||
* @param shard a non-null shard
|
||||
*/
|
||||
public void updateCumulativeMetrics(final Shard shard) {
|
||||
updateCumulativeMetrics(shard.getReadMetrics());
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the cumulative traversal metrics according to the data in this shard
|
||||
*
|
||||
|
|
|
|||
|
|
@ -259,8 +259,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
|
||||
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -196,7 +196,6 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(site.getStopLocation());
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@ public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,
|
|||
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
|
||||
sum = result.reduceResult;
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
}
|
||||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
|||
pairs.clear();
|
||||
pairs.add(read);
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(null);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -101,8 +101,6 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
final Iterator<MapData> aggregatedInputs = aggregateMapData(dataProvider);
|
||||
final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce);
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountReads extends ReadWalker<Integer, Integer> implements NanoSchedulable {
|
||||
public class CountReads extends ReadWalker<Integer, Long> implements NanoSchedulable {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override public Integer reduceInit() { return 0; }
|
||||
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
@Override public Long reduceInit() { return 0L; }
|
||||
|
||||
public Long reduce(Integer value, Long sum) { return (long) value + sum; }
|
||||
|
||||
public void onTraversalDone(Long result) {
|
||||
logger.info("CountReads counted " + result + " reads in the traversal");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class ReadMetricsUnitTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testReadsSeenDoNotOverflowInt() {
|
||||
|
||||
final ReadMetrics metrics = new ReadMetrics();
|
||||
|
||||
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
|
||||
|
||||
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
|
||||
metrics.incrementNumReadsSeen();
|
||||
}
|
||||
|
||||
Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt);
|
||||
Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE);
|
||||
|
||||
logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
|
||||
// Test the accuracy of the read metrics
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private SAMFileHeader header;
|
||||
private GATKSAMReadGroupRecord readGroup;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private File testBAM;
|
||||
|
||||
private static final int numReadsPerContig = 250000;
|
||||
private static final List<String> contigs = Arrays.asList("1", "2", "3");
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws IOException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
header.setSequenceDictionary(dictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test"));
|
||||
|
||||
final List<GATKSAMRecord> reads = new ArrayList<>();
|
||||
for ( final String contig : contigs ) {
|
||||
for ( int i = 1; i <= numReadsPerContig; i++ ) {
|
||||
reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i));
|
||||
}
|
||||
}
|
||||
|
||||
createBAM(reads);
|
||||
}
|
||||
|
||||
private void createBAM(final List<GATKSAMRecord> reads) throws IOException {
|
||||
testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam");
|
||||
testBAM.deleteOnExit();
|
||||
|
||||
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM);
|
||||
for (GATKSAMRecord read : reads ) {
|
||||
out.addAlignment(read);
|
||||
}
|
||||
out.close();
|
||||
|
||||
new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit();
|
||||
new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit();
|
||||
}
|
||||
|
||||
// copied from LocusViewTemplate
|
||||
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) {
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
record.setReferenceIndex(dictionary.getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
|
||||
record.setCigarString("1M");
|
||||
record.setReadString("A");
|
||||
record.setBaseQualityString("A");
|
||||
record.setReadGroup(readGroup);
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountsFromReadTraversal() {
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
final Collection<SAMReaderID> samFiles = new ArrayList<>();
|
||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
|
||||
final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
|
||||
final DummyReadWalker walker = new DummyReadWalker();
|
||||
traverseReadsNano.initialize(engine, walker, null);
|
||||
|
||||
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) {
|
||||
final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||
traverseReadsNano.traverse(walker, dataProvider, 0);
|
||||
dataProvider.close();
|
||||
}
|
||||
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountsFromLocusTraversal() {
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
final Collection<SAMReaderID> samFiles = new ArrayList<>();
|
||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
||||
final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
|
||||
final DummyLocusWalker walker = new DummyLocusWalker();
|
||||
traverseLociNano.initialize(engine, walker, null);
|
||||
|
||||
for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) {
|
||||
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
|
||||
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
|
||||
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||
traverseLociNano.traverse(walker, dataProvider, 0);
|
||||
dataProvider.close();
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
|
||||
//dataSource.close();
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountsFromActiveRegionTraversal() {
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
final Collection<SAMReaderID> samFiles = new ArrayList<>();
|
||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
||||
final List<GenomeLoc> intervals = new ArrayList<>(contigs.size());
|
||||
for ( final String contig : contigs )
|
||||
intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig));
|
||||
|
||||
final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions();
|
||||
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
traverseActiveRegions.initialize(engine, walker, null);
|
||||
|
||||
for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) {
|
||||
final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
|
||||
for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
|
||||
final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||
traverseActiveRegions.traverse(walker, dataProvider, 0);
|
||||
dataProvider.close();
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
traverseActiveRegions.endTraversal(walker, 0);
|
||||
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
||||
}
|
||||
|
||||
class DummyLocusWalker extends LocusWalker<Integer, Integer> {
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
class DummyReadWalker extends ReadWalker<Integer, Integer> {
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
@Override
|
||||
public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new ActivityProfileState(ref.getLocus(), 0.0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
|
||||
countReadWalker.onTraversalDone(accumulator);
|
||||
|
||||
if (!(accumulator instanceof Integer)) {
|
||||
fail("Count read walker should return an interger.");
|
||||
if (!(accumulator instanceof Long)) {
|
||||
fail("Count read walker should return a Long.");
|
||||
}
|
||||
if (((Integer) accumulator) != 10000) {
|
||||
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
|
||||
if (!accumulator.equals(new Long(10000))) {
|
||||
fail("there should be 10000 mapped reads in the index file, there was " + (accumulator));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class CountReadsUnitTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testReadsDoNotOverflowInt() {
|
||||
|
||||
final CountReads walker = new CountReads();
|
||||
|
||||
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
|
||||
|
||||
Long sum = walker.reduceInit();
|
||||
|
||||
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
|
||||
final Integer x = walker.map(null, null, null);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
Assert.assertEquals(sum.longValue(), moreThanMaxInt);
|
||||
Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue