Resolving merge conflicts after Mark's latest push
This commit is contained in:
commit
bce6fce58d
|
|
@ -1,99 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
|
||||
private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
|
||||
|
||||
final List<NestedIntegerArray<RecalDatum>> allThreadLocalQualityScoreTables = new LinkedList<NestedIntegerArray<RecalDatum>>();
|
||||
private ThreadLocal<NestedIntegerArray<RecalDatum>> threadLocalQualityScoreTables = new ThreadLocal<NestedIntegerArray<RecalDatum>>() {
|
||||
@Override
|
||||
protected synchronized NestedIntegerArray<RecalDatum> initialValue() {
|
||||
final NestedIntegerArray<RecalDatum> table = recalibrationTables.makeQualityScoreTable();
|
||||
allThreadLocalQualityScoreTables.add(table);
|
||||
return table;
|
||||
}
|
||||
};
|
||||
|
||||
@Override
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
final NestedIntegerArray<RecalDatum> qualityScoreTable = getThreadLocalQualityScoreTable();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.index;
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
|
||||
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a NestedIntegerArray for a QualityScore table specific to this thread
|
||||
* @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
|
||||
*/
|
||||
private NestedIntegerArray<RecalDatum> getThreadLocalQualityScoreTable() {
|
||||
return threadLocalQualityScoreTables.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeData() {
|
||||
// merge in all of the thread local tables
|
||||
logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
|
||||
for ( final NestedIntegerArray<RecalDatum> localTable : allThreadLocalQualityScoreTables ) {
|
||||
recalibrationTables.combineQualityScoreTable(localTable);
|
||||
}
|
||||
allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
|
||||
|
||||
super.finalizeData();
|
||||
}
|
||||
}
|
||||
|
|
@ -570,11 +570,32 @@ public class GenomeAnalysisEngine {
|
|||
else if(walker instanceof ActiveRegionWalker) {
|
||||
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
}
|
||||
|
||||
switch(argCollection.activeRegionShardType) {
|
||||
case LOCUSSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
case READSHARD:
|
||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
||||
new LegacyReadShardBalancer() :
|
||||
new ReadShardBalancer();
|
||||
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer);
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer);
|
||||
case ACTIVEREGIONSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer());
|
||||
default:
|
||||
throw new UserException.CommandLineException("Invalid active region shard type.");
|
||||
}
|
||||
}
|
||||
else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
|
||||
// Apply special validation to read pair walkers.
|
||||
if(walker instanceof ReadPairWalker) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -448,5 +449,14 @@ public class GATKArgumentCollection {
|
|||
@Hidden
|
||||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Experimental Active Region Traversal modes
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false)
|
||||
public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardDataProvider extends ShardDataProvider {
|
||||
final private ReadShardDataProvider readProvider;
|
||||
final private LocusShardDataProvider locusProvider;
|
||||
|
||||
public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard, genomeLocParser, reference, rods); // TODO: necessary?
|
||||
readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods);
|
||||
locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods);
|
||||
}
|
||||
|
||||
public ReadShardDataProvider getReadShardDataProvider() {
|
||||
return readProvider;
|
||||
}
|
||||
|
||||
public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) {
|
||||
return locusProvider;
|
||||
}
|
||||
}
|
||||
|
|
@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
*/
|
||||
private final ReadProperties sourceInfo;
|
||||
|
||||
/**
|
||||
* The parser, used to create and build new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||
*/
|
||||
|
|
@ -45,7 +40,22 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,genomeLocParser,reference,rods);
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a data provider based on an input provider
|
||||
* Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
* @param dataProvider
|
||||
* @param sourceInfo
|
||||
* @param genomeLocParser
|
||||
* @param locus
|
||||
* @param locusIterator
|
||||
*/
|
||||
public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) {
|
||||
super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData());
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShard extends ReadShard {
|
||||
public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||
super(parser, readsDataSource, fileSpans, loci, isUnmapped);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardBalancer extends ReadShardBalancer {
|
||||
// TODO ?
|
||||
}
|
||||
|
|
@ -40,7 +40,9 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ, LOCUS
|
||||
READ,
|
||||
LOCUS,
|
||||
ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
|
|
@ -11,6 +12,8 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -78,6 +81,18 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) {
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
if ( walker.isDone() ) break;
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else {
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
|
|
@ -93,6 +108,14 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) {
|
||||
final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) {
|
||||
final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
|
||||
|
|
@ -245,7 +246,12 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
} else if (walker instanceof ReadPairWalker) {
|
||||
return new TraverseReadPairs();
|
||||
} else if (walker instanceof ActiveRegionWalker) {
|
||||
return new TraverseActiveRegions();
|
||||
switch (engine.getArguments().activeRegionShardType) {
|
||||
case LOCUSSHARD: return new TraverseActiveRegions();
|
||||
case READSHARD: return new ExperimentalReadShardTraverseActiveRegions();
|
||||
case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions();
|
||||
default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker.");
|
||||
}
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public class GATKReport {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
addTable(tableName, tableDescription, numColumns, false, false);
|
||||
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -126,11 +126,10 @@ public class GATKReport {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort the rows by the row ID
|
||||
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
|
||||
* @param sortingWay way to sort table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
|
||||
tables.put(tableName, table);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ public class GATKReportTable {
|
|||
private final String tableName;
|
||||
private final String tableDescription;
|
||||
|
||||
private final boolean sortByRowID;
|
||||
private final boolean sortByAllColumns;
|
||||
private final TableSortingWay sortingWay;
|
||||
|
||||
private List<Object[]> underlyingData;
|
||||
private final List<GATKReportColumn> columnInfo;
|
||||
|
|
@ -73,6 +72,12 @@ public class GATKReportTable {
|
|||
public int index() { return index; }
|
||||
}
|
||||
|
||||
public enum TableSortingWay {
|
||||
SORT_BY_ROW,
|
||||
SORT_BY_COLUMN,
|
||||
DO_NOT_SORT
|
||||
}
|
||||
|
||||
protected enum TableNameHeaderFields {
|
||||
NAME(2),
|
||||
DESCRIPTION(3);
|
||||
|
|
@ -107,10 +112,7 @@ public class GATKReportTable {
|
|||
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByRowID = false;
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByAllColumns = false;
|
||||
sortingWay = TableSortingWay.DO_NOT_SORT;
|
||||
|
||||
// initialize the data
|
||||
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
|
||||
|
|
@ -181,7 +183,7 @@ public class GATKReportTable {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
this(tableName, tableDescription, numColumns, true, false);
|
||||
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -190,10 +192,9 @@ public class GATKReportTable {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
|
||||
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
|
||||
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
|
||||
if ( !isValidName(tableName) ) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
|
@ -204,8 +205,7 @@ public class GATKReportTable {
|
|||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByRowID = sortByRowID;
|
||||
this.sortByAllColumns = sortByAllColumns;
|
||||
this.sortingWay = sortingWay;
|
||||
|
||||
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
|
||||
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
|
||||
|
|
@ -218,7 +218,7 @@ public class GATKReportTable {
|
|||
* @param tableToCopy
|
||||
*/
|
||||
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
|
||||
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
|
||||
addColumn(column.getColumnName(), column.getFormat());
|
||||
if ( copyData )
|
||||
|
|
@ -569,56 +569,53 @@ public class GATKReportTable {
|
|||
out.println();
|
||||
|
||||
// write the table body
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
} else if ( sortByRowID ) {
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
break;
|
||||
case SORT_BY_ROW:
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
} else {
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
break;
|
||||
case DO_NOT_SORT:
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
|
|
@ -735,53 +732,47 @@ public class GATKReportTable {
|
|||
}
|
||||
|
||||
private List<Object[]> getOrderedRows() {
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
int result = EQUAL;
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
return underlyingData;
|
||||
case SORT_BY_ROW:
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
});
|
||||
return underlyingData;
|
||||
} else if ( !sortByRowID ) {
|
||||
return underlyingData;
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
default:
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,309 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalActiveRegionShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ActiveRegionShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ActiveRegionShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider();
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator);
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,309 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalReadShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ReadShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ReadShardDataProvider readDataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider));
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider,
|
||||
iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator);
|
||||
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
sum = processActiveRegion( activeRegion, sum, walker );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
for( final GATKSAMRecord read : myReads ) {
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
|
|
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add( read );
|
||||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
|
||||
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
@Override
|
||||
public void progress(MapData lastProcessedMap) {
|
||||
if ( lastProcessedMap.refContext != null )
|
||||
printProgress(lastProcessedMap.refContext.getLocus());
|
||||
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
|
||||
printProgress(lastProcessedMap.refContext.getLocus().getStopLocation());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,25 +113,39 @@ import java.util.List;
|
|||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
|
||||
/**
|
||||
* all the command line arguments for BQSR and it's covariates
|
||||
*/
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
/**
|
||||
* When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency
|
||||
* purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag
|
||||
* to safely access only one table. There may be some CPU cost, but as long as the table is really big
|
||||
* there should be relatively little CPU costs.
|
||||
*/
|
||||
@Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false)
|
||||
public boolean lowMemoryMode = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
|
||||
public double BAQGOP = BAQ.DEFAULT_GOP;
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
/**
|
||||
* an object that keeps track of the information necessary for quality score quantization
|
||||
*/
|
||||
private QuantizationInfo quantizationInfo;
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
/**
|
||||
* list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
*/
|
||||
private Covariate[] requestedCovariates;
|
||||
|
||||
private RecalibrationEngine recalibrationEngine;
|
||||
|
||||
private int minimumQToUse;
|
||||
|
||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
|
||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||
|
||||
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
||||
|
|
@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
* Based on the covariates' estimates for initial capacity allocate the data hashmap
|
||||
*/
|
||||
public void initialize() {
|
||||
|
||||
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
|
||||
|
||||
// check for unsupported access
|
||||
|
|
@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
|
||||
}
|
||||
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
|
||||
|
||||
recalibrationEngine = initializeRecalibrationEngine();
|
||||
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
|
||||
|
||||
initializeRecalibrationEngine();
|
||||
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
||||
referenceReader = getToolkit().getReferenceDataSource().getReference();
|
||||
}
|
||||
|
||||
private RecalibrationEngine initializeRecalibrationEngine() {
|
||||
/**
|
||||
* Initialize the recalibration engine
|
||||
*/
|
||||
private void initializeRecalibrationEngine() {
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
|
||||
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
|
||||
try {
|
||||
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
|
||||
constructor.setAccessible(true);
|
||||
return (RecalibrationEngine)constructor.newInstance();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName());
|
||||
}
|
||||
recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode);
|
||||
}
|
||||
|
||||
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
|
||||
|
|
@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
logger.info("Processed: " + result + " reads");
|
||||
}
|
||||
|
||||
private RecalibrationTables getRecalibrationTable() {
|
||||
return recalibrationEngine.getFinalRecalibrationTables();
|
||||
}
|
||||
|
||||
private void generatePlots() {
|
||||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
else
|
||||
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
* generate a quantization map (recalibrated_qual -> quantized_qual)
|
||||
*/
|
||||
private void quantizeQualityScores() {
|
||||
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
|
||||
quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
private void generateReport() {
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
|
||||
}
|
||||
}
|
||||
|
|
@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo {
|
|||
if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null");
|
||||
if ( skips == null ) throw new IllegalArgumentException("skips cannot be null");
|
||||
if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null");
|
||||
// future: may allow insertionErrors && deletionErrors to be null, so don't enforce
|
||||
if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null");
|
||||
if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null");
|
||||
|
||||
this.read = read;
|
||||
this.baseQuals = read.getBaseQualities();
|
||||
|
|
@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo {
|
|||
|
||||
if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length);
|
||||
if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
|
|||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable;
|
||||
if(SORT_BY_ALL_COLUMNS) {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
public interface RecalibrationEngine {
|
||||
import java.io.PrintStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class RecalibrationEngine {
|
||||
final protected Covariate[] covariates;
|
||||
final private int numReadGroups;
|
||||
final private PrintStream maybeLogStream;
|
||||
final private boolean lowMemoryMode;
|
||||
|
||||
/**
|
||||
* Has finalizeData() been called?
|
||||
*/
|
||||
private boolean finalized = false;
|
||||
|
||||
/**
|
||||
* The final (merged, etc) recalibration tables, suitable for downstream analysis.
|
||||
*/
|
||||
private RecalibrationTables finalRecalibrationTables = null;
|
||||
|
||||
private final List<RecalibrationTables> recalibrationTablesList = new LinkedList<RecalibrationTables>();
|
||||
|
||||
private final ThreadLocal<RecalibrationTables> threadLocalTables = new ThreadLocal<RecalibrationTables>() {
|
||||
private synchronized RecalibrationTables makeAndCaptureTable() {
|
||||
final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream);
|
||||
recalibrationTablesList.add(newTable);
|
||||
return newTable;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized RecalibrationTables initialValue() {
|
||||
if ( lowMemoryMode ) {
|
||||
return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0);
|
||||
} else {
|
||||
return makeAndCaptureTable();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a recalibration table suitable for updating the underlying RecalDatums
|
||||
*
|
||||
* May return a thread-local version, or a single version, depending on the initialization
|
||||
* arguments of this instance.
|
||||
*
|
||||
* @return updated tables
|
||||
*/
|
||||
protected RecalibrationTables getUpdatableRecalibrationTables() {
|
||||
return threadLocalTables.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the recalibration engine
|
||||
*
|
||||
|
|
@ -40,21 +95,167 @@ public interface RecalibrationEngine {
|
|||
* The engine should collect match and mismatch data into the recalibrationTables data.
|
||||
*
|
||||
* @param covariates an array of the covariates we'll be using in this engine, order matters
|
||||
* @param recalibrationTables the destination recalibrationTables where stats should be collected
|
||||
* @param numReadGroups the number of read groups we should use for the recalibration tables
|
||||
* @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables
|
||||
*/
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
|
||||
public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) {
|
||||
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
|
||||
if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups);
|
||||
|
||||
this.covariates = covariates.clone();
|
||||
this.numReadGroups = numReadGroups;
|
||||
this.maybeLogStream = maybeLogStream;
|
||||
this.lowMemoryMode = enableLowMemoryMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the recalibration statistics using the information in recalInfo
|
||||
* @param recalInfo data structure holding information about the recalibration values for a single read
|
||||
*/
|
||||
@Requires("recalInfo != null")
|
||||
public void updateDataForRead(final ReadRecalibrationInfo recalInfo);
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
final RecalibrationTables tables = getUpdatableRecalibrationTables();
|
||||
final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.ordinal();
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
|
||||
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize, if appropriate, all derived data in recalibrationTables.
|
||||
*
|
||||
* Called once after all calls to updateDataForRead have been issued.
|
||||
*
|
||||
* Assumes that all of the principal tables (by quality score) have been completely updated,
|
||||
* and walks over this data to create summary data tables like by read group table.
|
||||
*/
|
||||
public void finalizeData();
|
||||
public void finalizeData() {
|
||||
if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called");
|
||||
|
||||
// merge all of the thread-local tables
|
||||
finalRecalibrationTables = mergeThreadLocalRecalibrationTables();
|
||||
|
||||
final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable();
|
||||
final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable();
|
||||
|
||||
// iterate over all values in the qual table
|
||||
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||
final int rgKey = leaf.keys[0];
|
||||
final int eventIndex = leaf.keys[2];
|
||||
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
|
||||
final RecalDatum qualDatum = leaf.value;
|
||||
|
||||
if ( rgDatum == null ) {
|
||||
// create a copy of qualDatum, and initialize byReadGroup table with it
|
||||
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
|
||||
} else {
|
||||
// combine the qual datum with the existing datum in the byReadGroup table
|
||||
rgDatum.combine(qualDatum);
|
||||
}
|
||||
}
|
||||
|
||||
finalized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all of the thread local recalibration tables into a single one.
|
||||
*
|
||||
* Reuses one of the recalibration tables to hold the merged table, so this function can only be
|
||||
* called once in the engine.
|
||||
*
|
||||
* @return the merged recalibration table
|
||||
*/
|
||||
@Requires("! finalized")
|
||||
private RecalibrationTables mergeThreadLocalRecalibrationTables() {
|
||||
if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty");
|
||||
|
||||
RecalibrationTables merged = null;
|
||||
for ( final RecalibrationTables table : recalibrationTablesList ) {
|
||||
if ( merged == null )
|
||||
// fast path -- if there's only only one table, so just make it the merged one
|
||||
merged = table;
|
||||
else {
|
||||
merged.combine(table);
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the final recalibration tables, after finalizeData() has been called
|
||||
*
|
||||
* This returns the finalized recalibration table collected by this engine.
|
||||
*
|
||||
* It is an error to call this function before finalizeData has been called
|
||||
*
|
||||
* @return the finalized recalibration table collected by this engine
|
||||
*/
|
||||
public RecalibrationTables getFinalRecalibrationTables() {
|
||||
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
|
||||
return finalRecalibrationTables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1L, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1L, isError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,144 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
|
||||
protected Covariate[] covariates;
|
||||
protected RecalibrationTables recalibrationTables;
|
||||
|
||||
@Override
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
|
||||
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
|
||||
if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null");
|
||||
|
||||
this.covariates = covariates.clone();
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final EventType eventType = EventType.BASE_SUBSTITUTION;
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1L, isError, reportedQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create derived recalibration data tables
|
||||
*
|
||||
* Assumes that all of the principal tables (by quality score) have been completely updated,
|
||||
* and walks over this data to create summary data tables like by read group table.
|
||||
*/
|
||||
@Override
|
||||
public void finalizeData() {
|
||||
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
|
||||
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
|
||||
|
||||
// iterate over all values in the qual table
|
||||
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||
final int rgKey = leaf.keys[0];
|
||||
final int eventIndex = leaf.keys[2];
|
||||
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
|
||||
final RecalDatum qualDatum = leaf.value;
|
||||
|
||||
if ( rgDatum == null ) {
|
||||
// create a copy of qualDatum, and initialize byReadGroup table with it
|
||||
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
|
||||
} else {
|
||||
// combine the qual datum with the existing datum in the byReadGroup table
|
||||
rgDatum.combine(qualDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1, isError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
report = new GATKReport();
|
||||
report.addTable(reportName, reportDescription, 6, true, false);
|
||||
report.addTable(reportName, reportDescription, 6);
|
||||
table = report.getTable(reportName);
|
||||
table.addColumn("readgroup");
|
||||
table.addColumn("cycle");
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
|
|||
// create the table
|
||||
final String tableName = ve.getSimpleName();
|
||||
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()));
|
||||
|
||||
// grab the table, and add the columns we need to it
|
||||
final GATKReportTable table = report.getTable(tableName);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,11 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
public final boolean isActive;
|
||||
|
||||
// maximum stop position of all reads with start position in this active region
|
||||
// Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
// NB: these reads may not be associated with this active region!
|
||||
private int maxReadStop;
|
||||
|
||||
public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
|
||||
this.activeRegionLoc = activeRegionLoc;
|
||||
this.isActive = isActive;
|
||||
|
|
@ -33,6 +38,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
this.extension = extension;
|
||||
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
|
||||
fullExtentReferenceLoc = extendedLoc;
|
||||
maxReadStop = activeRegionLoc.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -93,6 +99,18 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||
|
||||
public void setMaxReadStop(int maxReadStop) {
|
||||
this.maxReadStop = maxReadStop;
|
||||
}
|
||||
|
||||
public int getMaxReadStop() {
|
||||
return maxReadStop;
|
||||
}
|
||||
|
||||
public int getExtendedMaxReadStop() {
|
||||
return maxReadStop + extension;
|
||||
}
|
||||
|
||||
public boolean equalExceptReads(final ActiveRegion other) {
|
||||
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
|
||||
if ( isActive != other.isActive ) return false;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 1/2/13
|
||||
* Time: 4:59 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum ExperimentalActiveRegionShardType {
|
||||
LOCUSSHARD, // default/legacy type
|
||||
READSHARD,
|
||||
ACTIVEREGIONSHARD
|
||||
}
|
||||
|
|
@ -160,6 +160,13 @@ public class ProgressMeter {
|
|||
public ProgressMeter(final File performanceLogFile,
|
||||
final String processingUnitName,
|
||||
final GenomeLocSortedSet processingIntervals) {
|
||||
this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS);
|
||||
}
|
||||
|
||||
protected ProgressMeter(final File performanceLogFile,
|
||||
final String processingUnitName,
|
||||
final GenomeLocSortedSet processingIntervals,
|
||||
final long pollingFrequency) {
|
||||
if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null");
|
||||
if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null");
|
||||
|
||||
|
|
@ -184,10 +191,14 @@ public class ProgressMeter {
|
|||
targetSizeInBP = processingIntervals.coveredSize();
|
||||
|
||||
// start up the timer
|
||||
progressMeterDaemon = new ProgressMeterDaemon(this);
|
||||
progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency);
|
||||
start();
|
||||
}
|
||||
|
||||
public ProgressMeterDaemon getProgressMeterDaemon() {
|
||||
return progressMeterDaemon;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start up the progress meter, printing initialization message and starting up the
|
||||
* daemon thread for periodic printing.
|
||||
|
|
@ -223,11 +234,13 @@ public class ProgressMeter {
|
|||
* the progress itself. A separate printing daemon periodically polls the meter to print out
|
||||
* progress
|
||||
*
|
||||
* @param loc Current location, can be null if you are at the end of the processing unit
|
||||
* @param loc Current location, can be null if you are at the end of the processing unit. Must
|
||||
* have size == 1 (cannot be multiple bases in size).
|
||||
* @param nTotalRecordsProcessed the total number of records we've processed
|
||||
*/
|
||||
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
|
||||
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
|
||||
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
|
||||
|
||||
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
|
||||
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
|
||||
|
|
|
|||
|
|
@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter;
|
|||
* Time: 9:16 PM
|
||||
*/
|
||||
public final class ProgressMeterDaemon extends Thread {
|
||||
public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
|
||||
|
||||
/**
|
||||
* How frequently should we poll and print progress?
|
||||
*/
|
||||
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
|
||||
private final long pollFrequencyMilliseconds;
|
||||
|
||||
/**
|
||||
* How long are we waiting between print progress calls are issued?
|
||||
* @return the time in milliseconds between progress meter calls
|
||||
*/
|
||||
private long getPollFrequencyMilliseconds() {
|
||||
return pollFrequencyMilliseconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we to continue periodically printing status, or should we shut down?
|
||||
|
|
@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
* Create a new ProgressMeterDaemon printing progress for meter
|
||||
* @param meter the progress meter to print progress of
|
||||
*/
|
||||
public ProgressMeterDaemon(final ProgressMeter meter) {
|
||||
public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) {
|
||||
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
|
||||
if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds);
|
||||
|
||||
this.meter = meter;
|
||||
this.pollFrequencyMilliseconds = pollFrequencyMilliseconds;
|
||||
setDaemon(true);
|
||||
setName("ProgressMeterDaemon");
|
||||
}
|
||||
|
||||
public ProgressMeterDaemon(final ProgressMeter meter) {
|
||||
this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this daemon thread to shutdown at the next opportunity, as the progress
|
||||
* metering is complete.
|
||||
|
|
@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
this.done = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this daemon thread done?
|
||||
* @return true if done, false otherwise
|
||||
*/
|
||||
public boolean isDone() {
|
||||
return done;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
|
||||
* necessary, the provided progress meter. Never exits until the JVM is complete,
|
||||
|
|
@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
while (! done) {
|
||||
meter.printProgress(false);
|
||||
try {
|
||||
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
|
||||
Thread.sleep(getPollFrequencyMilliseconds());
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
|
||||
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
|
||||
|
||||
if ( TEST_CACHING ) {
|
||||
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
|
||||
|
|
@ -210,7 +210,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
|
||||
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
|
||||
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
|
||||
|
||||
if ( TEST_CACHING ) {
|
||||
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
|
||||
|
|
@ -240,7 +240,7 @@ public class BaseRecalibration {
|
|||
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
|
||||
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
|
||||
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
|
|
@ -254,7 +254,7 @@ public class BaseRecalibration {
|
|||
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
|
||||
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
|
|
@ -287,7 +287,7 @@ public class BaseRecalibration {
|
|||
final double globalDeltaQ,
|
||||
final double deltaQReported,
|
||||
final byte qualFromRead) {
|
||||
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
|
||||
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);
|
||||
|
|
|
|||
|
|
@ -1,41 +1,39 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
public enum EventType {
|
||||
BASE_SUBSTITUTION(0, "M", "Base Substitution"),
|
||||
BASE_INSERTION(1, "I", "Base Insertion"),
|
||||
BASE_DELETION(2, "D", "Base Deletion");
|
||||
BASE_SUBSTITUTION("M", "Base Substitution"),
|
||||
BASE_INSERTION("I", "Base Insertion"),
|
||||
BASE_DELETION("D", "Base Deletion");
|
||||
|
||||
public final int index;
|
||||
private final String representation;
|
||||
private final String longRepresentation;
|
||||
|
||||
private EventType(int index, String representation, String longRepresentation) {
|
||||
this.index = index;
|
||||
private EventType(String representation, String longRepresentation) {
|
||||
this.representation = representation;
|
||||
this.longRepresentation = longRepresentation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the EventType corresponding to its ordinal index
|
||||
* @param index an ordinal index
|
||||
* @return the event type corresponding to ordinal index
|
||||
*/
|
||||
public static EventType eventFrom(int index) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return BASE_SUBSTITUTION;
|
||||
case 1:
|
||||
return BASE_INSERTION;
|
||||
case 2:
|
||||
return BASE_DELETION;
|
||||
default:
|
||||
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
|
||||
}
|
||||
return EventType.values()[index];
|
||||
}
|
||||
|
||||
public static EventType eventFrom(String event) {
|
||||
|
||||
/**
|
||||
* Get the EventType with short string representation
|
||||
* @throws IllegalArgumentException if representation doesn't correspond to one of EventType
|
||||
* @param representation short string representation of the event
|
||||
* @return an EventType
|
||||
*/
|
||||
public static EventType eventFrom(String representation) {
|
||||
for (EventType eventType : EventType.values())
|
||||
if (eventType.representation.equals(event))
|
||||
if (eventType.representation.equals(representation))
|
||||
return eventType;
|
||||
|
||||
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
|
||||
throw new IllegalArgumentException(String.format("Event %s does not exist.", representation));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -67,10 +67,10 @@ public class QuantizationInfo {
|
|||
return quantizationLevels;
|
||||
}
|
||||
|
||||
public GATKReportTable generateReportTable(boolean sortBycols) {
|
||||
public GATKReportTable generateReportTable(boolean sortByCols) {
|
||||
GATKReportTable quantizedTable;
|
||||
if(sortBycols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
|
||||
if(sortByCols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,9 +68,9 @@ public class ReadCovariates {
|
|||
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
|
||||
*/
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
||||
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,11 +81,11 @@ public class ReadCovariates {
|
|||
* @return
|
||||
*/
|
||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
return keys[errorModel.index][readPosition];
|
||||
return keys[errorModel.ordinal()][readPosition];
|
||||
}
|
||||
|
||||
public int[][] getKeySet(final EventType errorModel) {
|
||||
return keys[errorModel.index];
|
||||
return keys[errorModel.ordinal()];
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
|
@ -94,17 +94,9 @@ public class ReadCovariates {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
protected int[][] getMismatchesKeySet() {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index];
|
||||
}
|
||||
|
||||
protected int[][] getInsertionsKeySet() {
|
||||
return keys[EventType.BASE_INSERTION.index];
|
||||
}
|
||||
|
||||
protected int[][] getDeletionsKeySet() {
|
||||
return keys[EventType.BASE_DELETION.index];
|
||||
}
|
||||
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
|
||||
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
|
||||
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
|
||||
|
||||
protected int[] getMismatchesKeySet(final int readPosition) {
|
||||
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@ public class RecalUtils {
|
|||
final GATKReportTable reportTable;
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if(sortByCols) {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public class RecalibrationReport {
|
|||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[3] = event.index;
|
||||
tempCOVarray[3] = event.ordinal();
|
||||
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
|
|
@ -152,7 +152,7 @@ public class RecalibrationReport {
|
|||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
tempQUALarray[2] = event.ordinal();
|
||||
|
||||
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ public class RecalibrationReport {
|
|||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
tempRGarray[1] = event.ordinal();
|
||||
|
||||
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -123,12 +123,16 @@ public final class RecalibrationTables {
|
|||
}
|
||||
|
||||
/**
|
||||
* Merge in the quality score table information from qualityScoreTable into this
|
||||
* recalibration table's quality score table.
|
||||
*
|
||||
* @param qualityScoreTable the quality score table we want to merge in
|
||||
* Merge all of the tables from toMerge into into this set of tables
|
||||
*/
|
||||
public void combineQualityScoreTable(final NestedIntegerArray<RecalDatum> qualityScoreTable) {
|
||||
RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable);
|
||||
public void combine(final RecalibrationTables toMerge) {
|
||||
if ( numTables() != toMerge.numTables() )
|
||||
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
|
||||
|
||||
for ( int i = 0; i < numTables(); i++ ) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
|
||||
RecalUtils.combineTables(myTable, otherTable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,13 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Random;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test
|
||||
|
|
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
|
||||
}
|
||||
|
||||
private GATKReportTable getTableWithRandomValues() {
|
||||
Random number = new Random(123L);
|
||||
final int VALUESRANGE = 10;
|
||||
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
|
||||
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
|
||||
|
||||
final int NUMROWS = 100;
|
||||
for (int x = 0; x < NUMROWS; x++) {
|
||||
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSortingByColumn() {
|
||||
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
|
||||
}
|
||||
|
||||
private boolean isSorted(GATKReportTable table) {
|
||||
boolean result = true;
|
||||
File testingSortingTableFile = new File("testSortingFile.txt");
|
||||
|
||||
try {
|
||||
// Connect print stream to the output stream
|
||||
PrintStream ps = new PrintStream(testingSortingTableFile);
|
||||
table.write(ps);
|
||||
ps.close();
|
||||
}
|
||||
catch (Exception e){
|
||||
System.err.println ("Error: " + e.getMessage());
|
||||
}
|
||||
|
||||
ArrayList<int[]> rows = new ArrayList<int[]>();
|
||||
try {
|
||||
// Open the file
|
||||
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
|
||||
// Get the object of DataInputStream
|
||||
DataInputStream in = new DataInputStream(fStream);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
String strLine;
|
||||
//Read File Line By Line
|
||||
while ((strLine = br.readLine()) != null) {
|
||||
|
||||
String[] parts = strLine.split(" ");
|
||||
int l = parts.length;
|
||||
int[] row = new int[l];
|
||||
for(int n = 0; n < l; n++) {
|
||||
row[n] = Integer.parseInt(parts[n]);
|
||||
}
|
||||
rows.add(row);
|
||||
}
|
||||
//Close the input stream
|
||||
in.close();
|
||||
} catch (Exception e){//Catch exception if any
|
||||
System.err.println("Error: " + e.getMessage());
|
||||
}
|
||||
for (int x = 1; x < rows.size() && result; x++) {
|
||||
result = checkRowOrder(rows.get(x - 1), rows.get(x));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean checkRowOrder(int[] row1, int[] row2) {
|
||||
int l = row1.length;
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
for(int x = 0; x < l && ( result <= EQUAL); x++) {
|
||||
result = ((Integer)row1[x]).compareTo(row2[x]);
|
||||
}
|
||||
if (result <= EQUAL) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private GATKReportTable makeBasicTable() {
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
|
||||
GATKReportTable table = report.getTable("TableName");
|
||||
|
|
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
table.set("RZ", "SomeFloat", 535646345.657453464576);
|
||||
table.set("RZ", "TrueFalse", true);
|
||||
|
||||
report1.addTable("Table3", "blah", 1, true, false);
|
||||
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
|
||||
report1.getTable("Table3").addColumn("a");
|
||||
report1.getTable("Table3").addRowIDMapping("q", 2);
|
||||
report1.getTable("Table3").addRowIDMapping("5", 3);
|
||||
|
|
|
|||
|
|
@ -3,10 +3,16 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import com.google.java.contract.PreconditionError;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -15,7 +21,6 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -28,6 +33,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -95,7 +101,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||
private final TraverseActiveRegions<Integer, Integer> traverse = new TraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalReadShardTraverseActiveRegions<Integer, Integer> readShardTraverse = new ExperimentalReadShardTraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer> activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer>();
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
|
@ -106,21 +114,22 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
|
||||
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
|
||||
|
||||
private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
// TODO: test shard boundaries
|
||||
// TODO: reads with indels
|
||||
// TODO: reads which span many regions
|
||||
// TODO: reads which are partially between intervals (in/outside extension)
|
||||
// TODO: duplicate reads
|
||||
|
||||
// TODO: read at the end of a contig
|
||||
// TODO: reads which are completely outside intervals but within extension
|
||||
// TODO: test the extension itself
|
||||
|
||||
// TODO: unmapped reads
|
||||
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
|
|
@ -142,6 +151,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
|
||||
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
|
||||
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||
|
||||
createBAM(reads);
|
||||
|
|
@ -153,7 +165,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
File indexFile = new File(testBAI);
|
||||
indexFile.deleteOnExit();
|
||||
|
||||
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
|
||||
out.addAlignment(read);
|
||||
}
|
||||
|
|
@ -171,8 +183,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
||||
|
|
@ -272,6 +284,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -286,6 +301,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -309,6 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -323,6 +347,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -347,6 +377,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -361,6 +394,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -382,10 +421,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
traverse(walker, dataProvider, 0);
|
||||
|
||||
t.endTraversal(walker, 0);
|
||||
endTraversal(walker, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
|
@ -429,6 +468,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
header.setSequenceDictionary(dictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
|
|
@ -445,10 +485,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return record;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
private List<ShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
GATKArgumentCollection arguments = new GATKArgumentCollection();
|
||||
arguments.activeRegionShardType = shardType; // make explicit
|
||||
engine.setArguments(arguments);
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
|
||||
|
|
@ -456,13 +498,65 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
List<ShardDataProvider> providers = new ArrayList<ShardDataProvider>();
|
||||
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) {
|
||||
providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
|
||||
return providers;
|
||||
}
|
||||
|
||||
private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
private void endTraversal(DummyActiveRegionWalker walker, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.endTraversal(walker, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
public final class ReadRecalibrationInfoUnitTest extends BaseTest {
|
||||
@DataProvider(name = "InfoProvider")
|
||||
public Object[][] createCombineTablesProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int readLength: Arrays.asList(10, 100, 1000) ) {
|
||||
for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) {
|
||||
tests.add(new Object[]{readLength, includeIndelErrors});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "InfoProvider")
|
||||
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
|
||||
final ReadCovariates covariates = new ReadCovariates(readLength, 2);
|
||||
|
||||
final byte[] bases = new byte[readLength];
|
||||
final byte[] baseQuals = new byte[readLength];
|
||||
final byte[] insertionQuals = new byte[readLength];
|
||||
final byte[] deletionQuals = new byte[readLength];
|
||||
final boolean[] skips = new boolean[readLength];
|
||||
final double[] snpErrors = new double[readLength];
|
||||
final double[] insertionErrors = new double[readLength];
|
||||
final double[] deletionsErrors = new double[readLength];
|
||||
for ( int i = 0; i < readLength; i++ ) {
|
||||
bases[i] = 'A';
|
||||
baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE);
|
||||
insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE);
|
||||
deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE);
|
||||
skips[i] = i % 2 == 0;
|
||||
snpErrors[i] = 1.0 / (i+1);
|
||||
insertionErrors[i] = 0.5 / (i+1);
|
||||
deletionsErrors[i] = 0.3 / (i+1);
|
||||
}
|
||||
|
||||
final EnumMap<EventType, double[]> errors = new EnumMap<EventType, double[]>(EventType.class);
|
||||
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
|
||||
errors.put(EventType.BASE_INSERTION, insertionErrors);
|
||||
errors.put(EventType.BASE_DELETION, deletionsErrors);
|
||||
|
||||
final EnumMap<EventType, byte[]> quals = new EnumMap<EventType, byte[]>(EventType.class);
|
||||
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
|
||||
quals.put(EventType.BASE_INSERTION, insertionQuals);
|
||||
quals.put(EventType.BASE_DELETION, deletionQuals);
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M");
|
||||
if ( includeIndelErrors ) {
|
||||
read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION);
|
||||
read.setBaseQualities(deletionQuals, EventType.BASE_DELETION);
|
||||
}
|
||||
|
||||
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
|
||||
|
||||
Assert.assertEquals(info.getCovariatesValues(), covariates);
|
||||
Assert.assertEquals(info.getRead(), read);
|
||||
|
||||
for ( int i = 0; i < readLength; i++ ) {
|
||||
Assert.assertEquals(info.skip(i), skips[i]);
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
|
||||
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL;
|
||||
Assert.assertEquals(info.getQual(et, i), expectedQual);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* UnitTests for the ProgressMeterDaemon
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProgressMeterDaemonUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
|
||||
}
|
||||
|
||||
// capture and count calls to progress
|
||||
private class TestingProgressMeter extends ProgressMeter {
|
||||
final List<Long> progressCalls = new LinkedList<Long>();
|
||||
|
||||
private TestingProgressMeter(final long poll) {
|
||||
super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized void printProgress(boolean mustPrint) {
|
||||
progressCalls.add(System.currentTimeMillis());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PollingData")
|
||||
public Object[][] makePollingData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( final int ticks : Arrays.asList(1, 5, 10) ) {
|
||||
for ( final int poll : Arrays.asList(10, 100) ) {
|
||||
tests.add(new Object[]{poll, ticks});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90)
|
||||
public void testMe(final long poll, final int ticks) throws InterruptedException {
|
||||
final TestingProgressMeter meter = new TestingProgressMeter(poll);
|
||||
final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon();
|
||||
Assert.assertTrue(daemon.isDaemon());
|
||||
|
||||
Assert.assertFalse(daemon.isDone());
|
||||
Thread.sleep(ticks * poll);
|
||||
Assert.assertFalse(daemon.isDone());
|
||||
|
||||
daemon.done();
|
||||
Assert.assertTrue(daemon.isDone());
|
||||
|
||||
Assert.assertEquals(meter.progressCalls.size(), ticks,
|
||||
"Expected " + ticks + " progress calls from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* UnitTests for the ProgressMeterData
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProgressMeterDataUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testBasic() {
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFraction() {
|
||||
final double TOL = 1e-3;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSecondsPerBP() {
|
||||
final double TOL = 1e-3;
|
||||
final long M = 1000000;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSecondsPerElement() {
|
||||
final double TOL = 1e-3;
|
||||
final long M = 1000000;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public final class EventTypeUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testEventTypes() {
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertNotNull(et.toString());
|
||||
Assert.assertNotNull(et.prettyPrint());
|
||||
Assert.assertFalse("".equals(et.toString()));
|
||||
Assert.assertFalse("".equals(et.prettyPrint()));
|
||||
Assert.assertEquals(EventType.eventFrom(et.ordinal()), et);
|
||||
Assert.assertEquals(EventType.eventFrom(et.toString()), et);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEventTypesEnumItself() {
|
||||
final Set<String> shortReps = new HashSet<String>();
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et);
|
||||
shortReps.add(et.toString());
|
||||
}
|
||||
Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadString() {
|
||||
EventType.eventFrom("asdfhalsdjfalkjsdf");
|
||||
}
|
||||
}
|
||||
|
|
@ -20,9 +20,7 @@ public class RecalibrationReportUnitTest {
|
|||
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
int nErrors = random.nextInt(maxErrors);
|
||||
while ( nErrors > nObservations )
|
||||
nErrors = random.nextInt(maxErrors);
|
||||
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
|
||||
}
|
||||
|
|
@ -92,14 +90,14 @@ public class RecalibrationReportUnitTest {
|
|||
final int[] covariates = rc.getKeySet(offset, errorMode);
|
||||
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
|
||||
|
||||
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
|
||||
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
|
||||
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
|
||||
if ( covValue >= 0 ) {
|
||||
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index);
|
||||
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
|
||||
nKeys++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue