diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java deleted file mode 100644 index 255f1fd05..000000000 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.recalibration.EventType; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.recalibration.RecalDatum; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.LinkedList; -import java.util.List; - -public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { - private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class); - - final List> allThreadLocalQualityScoreTables = new LinkedList>(); - private ThreadLocal> threadLocalQualityScoreTables = new ThreadLocal>() { - @Override - protected synchronized NestedIntegerArray initialValue() { - final NestedIntegerArray table = recalibrationTables.makeQualityScoreTable(); - allThreadLocalQualityScoreTables.add(table); - return table; - } - }; - - @Override - public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { - final GATKSAMRecord read = recalInfo.getRead(); - final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); - final NestedIntegerArray qualityScoreTable = getThreadLocalQualityScoreTable(); - - for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( ! recalInfo.skip(offset) ) { - - for (final EventType eventType : EventType.values()) { - final int[] keys = readCovariates.getKeySet(offset, eventType); - final int eventIndex = eventType.index; - final byte qual = recalInfo.getQual(eventType, offset); - final double isError = recalInfo.getErrorFraction(eventType, offset); - - incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex); - - for (int i = 2; i < covariates.length; i++) { - if (keys[i] < 0) - continue; - - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); - } - } - } - } - } - - /** - * Get a NestedIntegerArray for a QualityScore table specific to this thread - * @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate - */ - private NestedIntegerArray getThreadLocalQualityScoreTable() { - return threadLocalQualityScoreTables.get(); - } - - @Override - public void finalizeData() { - // merge in all of the thread local tables - logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables"); - for ( final NestedIntegerArray localTable : allThreadLocalQualityScoreTables ) { - recalibrationTables.combineQualityScoreTable(localTable); - } - allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves - - super.finalizeData(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 1187039bb..bee25dc2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -570,11 +570,32 @@ public class GenomeAnalysisEngine { else if(walker instanceof ActiveRegionWalker) { if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); - if(intervals == null) - return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer()); - else - return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer()); - } + + switch(argCollection.activeRegionShardType) { + case LOCUSSHARD: + if(intervals == null) + return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer()); + else + return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer()); + case READSHARD: + // Use the legacy ReadShardBalancer if legacy downsampling is enabled + ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ? + new LegacyReadShardBalancer() : + new ReadShardBalancer(); + + if(intervals == null) + return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer); + else + return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer); + case ACTIVEREGIONSHARD: + if(intervals == null) + return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer()); + else + return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer()); + default: + throw new UserException.CommandLineException("Invalid active region shard type."); + } + } else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) { // Apply special validation to read pair walkers. if(walker instanceof ReadPairWalker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index d9c7c9008..beaeacc85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.samples.PedigreeValidationType; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; @@ -448,5 +449,14 @@ public class GATKArgumentCollection { @Hidden public boolean generateShadowBCF = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed + + // -------------------------------------------------------------------------------------------------------------- + // + // Experimental Active Region Traversal modes + // + // -------------------------------------------------------------------------------------------------------------- + + @Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false) + public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java new file mode 100644 index 000000000..55e51f934 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.providers; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.Collection; + +/** + * @author Joel Thibault + */ +public class ActiveRegionShardDataProvider extends ShardDataProvider { + final private ReadShardDataProvider readProvider; + final private LocusShardDataProvider locusProvider; + + public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { + super(shard, genomeLocParser, reference, rods); // TODO: necessary? + readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods); + locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods); + } + + public ReadShardDataProvider getReadShardDataProvider() { + return readProvider; + } + + public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) { + return locusProvider; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 55304da34..1607469eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider { */ private final ReadProperties sourceInfo; - /** - * The parser, used to create and build new GenomeLocs. - */ - private final GenomeLocParser genomeLocParser; - /** * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). */ @@ -45,7 +40,22 @@ public class LocusShardDataProvider extends ShardDataProvider { public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { super(shard,genomeLocParser,reference,rods); this.sourceInfo = sourceInfo; - this.genomeLocParser = genomeLocParser; + this.locus = locus; + this.locusIterator = locusIterator; + } + + /** + * Create a data provider based on an input provider + * Used only by ExperimentalReadShardTraverseActiveRegions + * @param dataProvider + * @param sourceInfo + * @param genomeLocParser + * @param locus + * @param locusIterator + */ + public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) { + super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData()); + this.sourceInfo = sourceInfo; this.locus = locus; this.locusIterator = locusIterator; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java new file mode 100755 index 000000000..381b193e9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.SAMFileSpan; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.List; +import java.util.Map; + +/** + * @author Joel Thibault + */ +public class ActiveRegionShard extends ReadShard { + public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map fileSpans, List loci, boolean isUnmapped) { + super(parser, readsDataSource, fileSpans, loci, isUnmapped); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java new file mode 100644 index 000000000..338dd1bdf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +/** + * @author Joel Thibault + */ +public class ActiveRegionShardBalancer extends ReadShardBalancer { + // TODO ? +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java index e22a7a54d..314156af6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -40,7 +40,9 @@ import java.util.Map; */ public abstract class Shard implements HasGenomeLocation { public enum ShardType { - READ, LOCUS + READ, + LOCUS, + ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions } protected final GenomeLocParser parser; // incredibly annoying! diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index f3c1ae91c..44f9978a6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.executive; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; @@ -11,6 +12,8 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions; +import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -78,6 +81,18 @@ public class LinearMicroScheduler extends MicroScheduler { } windowMaker.close(); } + else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) { + WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), + getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine)); + for(WindowMaker.WindowMakerIterator iterator: windowMaker) { + ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods); + Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); + accumulator.accumulate(dataProvider,result); + dataProvider.close(); + if ( walker.isDone() ) break; + } + windowMaker.close(); + } else { ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); @@ -93,6 +108,14 @@ public class LinearMicroScheduler extends MicroScheduler { final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator } + else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) { + final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); + accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator + } + else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) { + final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); + accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator + } Object result = accumulator.finishTraversal(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index f8aec1489..13c11def6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.AutoFormattingTime; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; @@ -245,7 +246,12 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { } else if (walker instanceof ReadPairWalker) { return new TraverseReadPairs(); } else if (walker instanceof ActiveRegionWalker) { - return new TraverseActiveRegions(); + switch (engine.getArguments().activeRegionShardType) { + case LOCUSSHARD: return new TraverseActiveRegions(); + case READSHARD: return new ExperimentalReadShardTraverseActiveRegions(); + case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions(); + default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker."); + } } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index e69924930..1451b8cde 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -117,7 +117,7 @@ public class GATKReport { * @param numColumns the number of columns in this table */ public void addTable(final String tableName, final String tableDescription, final int numColumns) { - addTable(tableName, tableDescription, numColumns, false, false); + addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT); } /** @@ -126,11 +126,10 @@ public class GATKReport { * @param tableName the name of the table * @param tableDescription the description of the table * @param numColumns the number of columns in this table - * @param sortByRowID whether to sort the rows by the row ID - * @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column + * @param sortingWay way to sort table */ - public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns); + public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 2bf7c9609..226e50f81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -46,8 +46,7 @@ public class GATKReportTable { private final String tableName; private final String tableDescription; - private final boolean sortByRowID; - private final boolean sortByAllColumns; + private final TableSortingWay sortingWay; private List underlyingData; private final List columnInfo; @@ -73,6 +72,12 @@ public class GATKReportTable { public int index() { return index; } } + public enum TableSortingWay { + SORT_BY_ROW, + SORT_BY_COLUMN, + DO_NOT_SORT + } + protected enum TableNameHeaderFields { NAME(2), DESCRIPTION(3); @@ -107,10 +112,7 @@ public class GATKReportTable { tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay) // when reading from a file, we do not re-sort the rows - sortByRowID = false; - - // when reading from a file, we do not re-sort the rows - sortByAllColumns = false; + sortingWay = TableSortingWay.DO_NOT_SORT; // initialize the data final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]); @@ -181,7 +183,7 @@ public class GATKReportTable { * @param numColumns the number of columns in this table */ public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) { - this(tableName, tableDescription, numColumns, true, false); + this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW); } /** @@ -190,10 +192,9 @@ public class GATKReportTable { * @param tableName the name of the table * @param tableDescription the description of the table * @param numColumns the number of columns in this table - * @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added) - * @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added) + * @param sortingWay in what way to sort rows (instead of the order in which they were added) */ - public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { + public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) { if ( !isValidName(tableName) ) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } @@ -204,8 +205,7 @@ public class GATKReportTable { this.tableName = tableName; this.tableDescription = tableDescription; - this.sortByRowID = sortByRowID; - this.sortByAllColumns = sortByAllColumns; + this.sortingWay = sortingWay; underlyingData = new ArrayList(INITITAL_ARRAY_SIZE); columnInfo = new ArrayList(numColumns); @@ -218,7 +218,7 @@ public class GATKReportTable { * @param tableToCopy */ public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) { - this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns); + this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay); for ( final GATKReportColumn column : tableToCopy.getColumnInfo() ) addColumn(column.getColumnName(), column.getFormat()); if ( copyData ) @@ -569,56 +569,53 @@ public class GATKReportTable { out.println(); // write the table body - if ( sortByAllColumns ) { - Collections.sort(underlyingData, new Comparator() { - //INVARIANT the two arrays are of the same length and corresponding elements are of the same type - @Override - public int compare(Object[] objectArr1, Object[] objectArr2) { - final int EQUAL = 0; + switch (sortingWay) { + case SORT_BY_COLUMN: + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; - int result = EQUAL; + int result = EQUAL; - int l = objectArr1.length; - for (int x = 0; x < l; x++) { - if (objectArr1[x] instanceof Integer) { - result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + } if( result != EQUAL) { return result; } - } else if (objectArr1[x] instanceof Double) { - result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); - if( result != EQUAL) { - return result; - } - } else { // default uses String comparison - result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); - if( result != EQUAL) { - return result; - } } + return result; } - return result; - } - }); - for ( final Object[] row : underlyingData ) - writeRow(out, row); - } else if ( sortByRowID ) { - // make sure that there are exactly the correct number of ID mappings - if ( rowIdToIndex.size() != underlyingData.size() ) - throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); + }); + for ( final Object[] row : underlyingData ) + writeRow(out, row); + break; + case SORT_BY_ROW: + // make sure that there are exactly the correct number of ID mappings + if ( rowIdToIndex.size() != underlyingData.size() ) + throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); - final TreeMap sortedMap; - try { - sortedMap = new TreeMap(rowIdToIndex); - } catch (ClassCastException e) { - throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); - } - for ( final Map.Entry rowKey : sortedMap.entrySet() ) - writeRow(out, underlyingData.get(rowKey.getValue())); - } else { - for ( final Object[] row : underlyingData ) - writeRow(out, row); - } + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); + } + for ( final Map.Entry rowKey : sortedMap.entrySet() ) + writeRow(out, underlyingData.get(rowKey.getValue())); + break; + case DO_NOT_SORT: + for ( final Object[] row : underlyingData ) + writeRow(out, row); + } out.println(); } @@ -735,53 +732,47 @@ public class GATKReportTable { } private List getOrderedRows() { - if ( sortByAllColumns ) { - Collections.sort(underlyingData, new Comparator() { - //INVARIANT the two arrays are of the same length and corresponding elements are of the same type - @Override - public int compare(Object[] objectArr1, Object[] objectArr2) { - final int EQUAL = 0; - int result = EQUAL; - - int l = objectArr1.length; - for (int x = 0; x < l; x++) { - if (objectArr1[x] instanceof Integer) { - result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); - if( result != EQUAL) { - return result; + switch (sortingWay) { + case SORT_BY_COLUMN: + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; + int result = EQUAL; + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + } + if( result != EQUAL) { + return result; + } } - } else if (objectArr1[x] instanceof Double) { - result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); - if( result != EQUAL) { - return result; - } - } else { // default uses String comparison - result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); - if( result != EQUAL) { - return result; - } - } + return result; } - return result; + }); + return underlyingData; + case SORT_BY_ROW: + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + return underlyingData; } - }); - return underlyingData; - } else if ( !sortByRowID ) { - return underlyingData; + + final List orderedData = new ArrayList(underlyingData.size()); + for ( final int rowKey : sortedMap.values() ) + orderedData.add(underlyingData.get(rowKey)); + + return orderedData; + default: + return underlyingData; } - - final TreeMap sortedMap; - try { - sortedMap = new TreeMap(rowIdToIndex); - } catch (ClassCastException e) { - return underlyingData; - } - - final List orderedData = new ArrayList(underlyingData.size()); - for ( final int rowKey : sortedMap.values() ) - orderedData.add(underlyingData.get(rowKey)); - - return orderedData; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java new file mode 100644 index 000000000..45d132678 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java @@ -0,0 +1,309 @@ +package org.broadinstitute.sting.gatk.traversals; + +import net.sf.samtools.SAMFileHeader; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.*; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.*; + +public class ExperimentalActiveRegionShardTraverseActiveRegions extends TraversalEngine,ActiveRegionShardDataProvider> { + /** + * our log, which we want to capture anything from this class + */ + protected final static Logger logger = Logger.getLogger(TraversalEngine.class); + + private final LinkedList workQueue = new LinkedList(); + private final LinkedList myReads = new LinkedList(); + + @Override + public String getTraversalUnits() { + return "active regions"; + } + + @Override + public T traverse( final ActiveRegionWalker walker, + final ActiveRegionShardDataProvider dataProvider, + T sum) { + logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider)); + + ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider(); + + final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); + final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); + + final ReadView readView = new ReadView(readDataProvider); + + final List activeRegions = new LinkedList(); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions()); + + Shard readShard = readDataProvider.getShard(); + SAMFileHeader header = readShard.getReadProperties().getHeader(); + WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(), + readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header)); + + for(WindowMaker.WindowMakerIterator iterator: windowMaker) { + LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator); + final LocusView locusView = new AllLocusView(locusDataProvider); + final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider ); + ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView); + + // We keep processing while the next reference location is within the interval + GenomeLoc prevLoc = null; + while( locusView.hasNext() ) { + final AlignmentContext locus = locusView.next(); + final GenomeLoc location = locus.getLocation(); + + if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { + // we've move across some interval boundary, restart profile + profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + } + + readDataProvider.getShard().getReadMetrics().incrementNumIterations(); + + // create reference context. Note that if we have a pileup of "extended events", the context will + // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). + final ReferenceContext refContext = referenceView.getReferenceContext(location); + + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + + // Call the walkers isActive function for this locus and add them to the list to be integrated later + profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); + + prevLoc = location; + + printProgress(locus.getLocation()); + } + + locusDataProvider.close(); + } + + windowMaker.close(); + + updateCumulativeMetrics(readDataProvider.getShard()); + + if ( ! profile.isEmpty() ) + incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + + // add active regions to queue of regions to process + // first check if can merge active regions over shard boundaries + if( !activeRegions.isEmpty() ) { + if( !workQueue.isEmpty() ) { + final ActiveRegion last = workQueue.getLast(); + final ActiveRegion first = activeRegions.get(0); + if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { + workQueue.removeLast(); + activeRegions.remove(first); + workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension)); + } + } + workQueue.addAll( activeRegions ); + } + + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + + // now process the active regions, where possible + boolean emptyQueue = false; + sum = processActiveRegions(walker, sum, emptyQueue); + + return sum; + } + + /** + * Take the individual isActive calls and integrate them into contiguous active regions and + * add these blocks of work to the work queue + * band-pass filter the list of isActive probabilities and turn into active regions + * + * @param profile + * @param activeRegions + * @param activeRegionExtension + * @param maxRegionSize + * @return + */ + private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, + final List activeRegions, + final int activeRegionExtension, + final int maxRegionSize) { + if ( profile.isEmpty() ) + throw new IllegalStateException("trying to incorporate an empty active profile " + profile); + + final ActivityProfile bandPassFiltered = profile.bandPassFilter(); + activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); + return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); + } + + + // -------------------------------------------------------------------------------- + // + // simple utility functions + // + // -------------------------------------------------------------------------------- + + private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, + final RefMetaDataTracker tracker, final ReferenceContext refContext, + final AlignmentContext locus, final GenomeLoc location) { + if ( walker.hasPresetActiveRegions() ) { + return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); + } else { + return walker.isActive( tracker, refContext, locus ); + } + } + + private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker walker, + final LocusShardDataProvider dataProvider, + final LocusView locusView) { + if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) + return new ManagingReferenceOrderedView( dataProvider ); + else + return (RodLocusView)locusView; + } + + // -------------------------------------------------------------------------------- + // + // code to handle processing active regions + // + // -------------------------------------------------------------------------------- + + private T processActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { + if( walker.activeRegionOutStream != null ) { + writeActiveRegionsToStream(walker); + return sum; + } else { + return callWalkerMapOnActiveRegions(walker, sum, emptyQueue); + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param walker + */ + private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { + // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : workQueue ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + + private T callWalkerMapOnActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { + final int lastRegionStart = workQueue.getLast().getLocation().getStart(); + final String lastRegionContig = workQueue.getLast().getLocation().getContig(); + + // If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them + // TODO can implement parallel traversal here + while( workQueue.peekFirst() != null ) { + ActiveRegion firstRegion = workQueue.getFirst(); + final String firstRegionContig = firstRegion.getLocation().getContig(); + if (emptyQueue || firstRegionContig != lastRegionContig) { + sum = processFirstActiveRegion(sum, walker); + } + else { + final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop(); + if (lastRegionStart > firstRegionMaxReadStop) { + sum = processFirstActiveRegion( sum, walker ); + } + else { + break; + } + } + } + + return sum; + } + + /** + * Process the first active region and all remaining reads which overlap + * + * Remove the first active region from the queue + * (NB: some reads associated with this active region may have already been processed) + * + * Remove all of these reads from the queue + * (NB: some may be associated with other active regions) + * + * @param sum + * @param walker + * @return + */ + private T processFirstActiveRegion( final T sum, final ActiveRegionWalker walker ) { + final ActiveRegion firstRegion = workQueue.removeFirst(); + + GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here + GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); + + while ( firstRegion.getLocation().overlapsP( firstReadLoc ) || + (walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) { + if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { + // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) + long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc ); + ActiveRegion bestRegion = firstRegion; + for( final ActiveRegion otherRegionToTest : workQueue ) { + if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) { + maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc ); + bestRegion = otherRegionToTest; + } + } + bestRegion.add( firstRead ); + + // The read is also added to all other regions in which it overlaps but marked as non-primary + if( walker.wantsNonPrimaryReads() ) { + if( !bestRegion.equals(firstRegion) ) { + firstRegion.add(firstRead); + } + for( final ActiveRegion otherRegionToTest : workQueue ) { + if( !bestRegion.equals(otherRegionToTest) ) { + // check for non-primary vs. extended + if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) { + otherRegionToTest.add( firstRead ); + } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) { + otherRegionToTest.add( firstRead ); + } + } + } + } + + // check for non-primary vs. extended + } else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { + if ( walker.wantsNonPrimaryReads() ) { + firstRegion.add( firstRead ); + } + } else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) { + firstRegion.add( firstRead ); + } + + myReads.removeFirst(); + firstRead = myReads.peekFirst(); + firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); + } + + logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc()); + final M x = walker.map( firstRegion, null ); + return walker.reduce(x, sum); + } + + /** + * Special function called in LinearMicroScheduler to empty out the work queue. + * Ugly for now but will be cleaned up when we push this functionality more into the engine + */ + public T endTraversal( final Walker walker, T sum) { + boolean emptyQueue = true; + return processActiveRegions((ActiveRegionWalker)walker, sum, emptyQueue); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java new file mode 100644 index 000000000..299ee4f56 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java @@ -0,0 +1,309 @@ +package org.broadinstitute.sting.gatk.traversals; + +import net.sf.samtools.SAMFileHeader; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.*; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.*; + +public class ExperimentalReadShardTraverseActiveRegions extends TraversalEngine,ReadShardDataProvider> { + /** + * our log, which we want to capture anything from this class + */ + protected final static Logger logger = Logger.getLogger(TraversalEngine.class); + + private final LinkedList workQueue = new LinkedList(); + private final LinkedList myReads = new LinkedList(); + + @Override + public String getTraversalUnits() { + return "active regions"; + } + + @Override + public T traverse( final ActiveRegionWalker walker, + final ReadShardDataProvider readDataProvider, + T sum) { + logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider)); + + final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); + final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); + + final ReadView readView = new ReadView(readDataProvider); + + final List activeRegions = new LinkedList(); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions()); + + Shard readShard = readDataProvider.getShard(); + SAMFileHeader header = readShard.getReadProperties().getHeader(); + WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(), + readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header)); + + for(WindowMaker.WindowMakerIterator iterator: windowMaker) { + LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider, + iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator); + + final LocusView locusView = new AllLocusView(locusDataProvider); + final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider ); + ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView); + + // We keep processing while the next reference location is within the interval + GenomeLoc prevLoc = null; + while( locusView.hasNext() ) { + final AlignmentContext locus = locusView.next(); + final GenomeLoc location = locus.getLocation(); + + if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { + // we've move across some interval boundary, restart profile + profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + } + + readDataProvider.getShard().getReadMetrics().incrementNumIterations(); + + // create reference context. Note that if we have a pileup of "extended events", the context will + // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). + final ReferenceContext refContext = referenceView.getReferenceContext(location); + + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + + // Call the walkers isActive function for this locus and add them to the list to be integrated later + profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); + + prevLoc = location; + + printProgress(locus.getLocation()); + } + + locusDataProvider.close(); + } + + windowMaker.close(); + + updateCumulativeMetrics(readDataProvider.getShard()); + + if ( ! profile.isEmpty() ) + incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + + // add active regions to queue of regions to process + // first check if can merge active regions over shard boundaries + if( !activeRegions.isEmpty() ) { + if( !workQueue.isEmpty() ) { + final ActiveRegion last = workQueue.getLast(); + final ActiveRegion first = activeRegions.get(0); + if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { + workQueue.removeLast(); + activeRegions.remove(first); + workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension)); + } + } + workQueue.addAll( activeRegions ); + } + + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + + // now process the active regions, where possible + boolean emptyQueue = false; + sum = processActiveRegions(walker, sum, emptyQueue); + + return sum; + } + + /** + * Take the individual isActive calls and integrate them into contiguous active regions and + * add these blocks of work to the work queue + * band-pass filter the list of isActive probabilities and turn into active regions + * + * @param profile + * @param activeRegions + * @param activeRegionExtension + * @param maxRegionSize + * @return + */ + private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, + final List activeRegions, + final int activeRegionExtension, + final int maxRegionSize) { + if ( profile.isEmpty() ) + throw new IllegalStateException("trying to incorporate an empty active profile " + profile); + + final ActivityProfile bandPassFiltered = profile.bandPassFilter(); + activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); + return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); + } + + + // -------------------------------------------------------------------------------- + // + // simple utility functions + // + // -------------------------------------------------------------------------------- + + private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, + final RefMetaDataTracker tracker, final ReferenceContext refContext, + final AlignmentContext locus, final GenomeLoc location) { + if ( walker.hasPresetActiveRegions() ) { + return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); + } else { + return walker.isActive( tracker, refContext, locus ); + } + } + + private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker walker, + final LocusShardDataProvider dataProvider, + final LocusView locusView) { + if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) + return new ManagingReferenceOrderedView( dataProvider ); + else + return (RodLocusView)locusView; + } + + // -------------------------------------------------------------------------------- + // + // code to handle processing active regions + // + // -------------------------------------------------------------------------------- + + private T processActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { + if( walker.activeRegionOutStream != null ) { + writeActiveRegionsToStream(walker); + return sum; + } else { + return callWalkerMapOnActiveRegions(walker, sum, emptyQueue); + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param walker + */ + private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { + // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : workQueue ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + + private T callWalkerMapOnActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { + final int lastRegionStart = workQueue.getLast().getLocation().getStart(); + final String lastRegionContig = workQueue.getLast().getLocation().getContig(); + + // If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them + // TODO can implement parallel traversal here + while( workQueue.peekFirst() != null ) { + ActiveRegion firstRegion = workQueue.getFirst(); + final String firstRegionContig = firstRegion.getLocation().getContig(); + if (emptyQueue || firstRegionContig != lastRegionContig) { + sum = processFirstActiveRegion(sum, walker); + } + else { + final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop(); + if (lastRegionStart > firstRegionMaxReadStop) { + sum = processFirstActiveRegion( sum, walker ); + } + else { + break; + } + } + } + + return sum; + } + + /** + * Process the first active region and all remaining reads which overlap + * + * Remove the first active region from the queue + * (NB: some reads associated with this active region may have already been processed) + * + * Remove all of these reads from the queue + * (NB: some may be associated with other active regions) + * + * @param sum + * @param walker + * @return + */ + private T processFirstActiveRegion( final T sum, final ActiveRegionWalker walker ) { + final ActiveRegion firstRegion = workQueue.removeFirst(); + + GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here + GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); + + while ( firstRegion.getLocation().overlapsP( firstReadLoc ) || + (walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) { + if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { + // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) + long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc ); + ActiveRegion bestRegion = firstRegion; + for( final ActiveRegion otherRegionToTest : workQueue ) { + if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) { + maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc ); + bestRegion = otherRegionToTest; + } + } + bestRegion.add( firstRead ); + + // The read is also added to all other regions in which it overlaps but marked as non-primary + if( walker.wantsNonPrimaryReads() ) { + if( !bestRegion.equals(firstRegion) ) { + firstRegion.add(firstRead); + } + for( final ActiveRegion otherRegionToTest : workQueue ) { + if( !bestRegion.equals(otherRegionToTest) ) { + // check for non-primary vs. extended + if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) { + otherRegionToTest.add( firstRead ); + } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) { + otherRegionToTest.add( firstRead ); + } + } + } + } + + // check for non-primary vs. extended + } else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { + if ( walker.wantsNonPrimaryReads() ) { + firstRegion.add( firstRead ); + } + } else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) { + firstRegion.add( firstRead ); + } + + myReads.removeFirst(); + firstRead = myReads.peekFirst(); + firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); + } + + logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc()); + final M x = walker.map( firstRegion, null ); + return walker.reduce(x, sum); + } + + /** + * Special function called in LinearMicroScheduler to empty out the work queue. + * Ugly for now but will be cleaned up when we push this functionality more into the engine + */ + public T endTraversal( final Walker walker, T sum) { + boolean emptyQueue = true; + return processActiveRegions((ActiveRegionWalker)walker, sum, emptyQueue); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 06fc01232..2562fcccd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -43,7 +43,7 @@ public class TraverseActiveRegions extends TraversalEngine walker, final LocusShardDataProvider dataProvider, T sum) { - logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider)); + logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider)); final LocusView locusView = new AllLocusView(dataProvider); @@ -227,7 +227,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine reads, final Queue workQueue, final T sum, final ActiveRegionWalker walker ) { + private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker walker ) { final ArrayList placedReads = new ArrayList(); - for( final GATKSAMRecord read : reads ) { + for( final GATKSAMRecord read : myReads ) { final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read ); if( activeRegion.getLocation().overlapsP( readLoc ) ) { // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) @@ -278,7 +278,7 @@ public class TraverseActiveRegions extends TraversalEngine> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java index ee71d82bb..aa33def62 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java @@ -65,7 +65,8 @@ public class TraverseReadsNano extends TraversalEngine, @Override public void progress(MapData lastProcessedMap) { if ( lastProcessedMap.refContext != null ) - printProgress(lastProcessedMap.refContext.getLocus()); + // note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon + printProgress(lastProcessedMap.refContext.getLocus().getStopLocation()); } }); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 7692c58e2..2c774d94d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -113,25 +113,39 @@ import java.util.List; @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class}) @PartitionBy(PartitionType.READ) public class BaseRecalibrator extends ReadWalker implements NanoSchedulable { + /** + * all the command line arguments for BQSR and it's covariates + */ @ArgumentCollection - private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates + private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + + /** + * When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency + * purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag + * to safely access only one table. There may be some CPU cost, but as long as the table is really big + * there should be relatively little CPU costs. + */ + @Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false) + public boolean lowMemoryMode = false; @Advanced @Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false) public double BAQGOP = BAQ.DEFAULT_GOP; - private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization + /** + * an object that keeps track of the information necessary for quality score quantization + */ + private QuantizationInfo quantizationInfo; - private RecalibrationTables recalibrationTables; - - private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) + /** + * list to hold the all the covariate objects that were requested (required + standard + experimental) + */ + private Covariate[] requestedCovariates; private RecalibrationEngine recalibrationEngine; private int minimumQToUse; - protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ - private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector @@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche * Based on the covariates' estimates for initial capacity allocate the data hashmap */ public void initialize() { - baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty // check for unsupported access @@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e); } - int numReadGroups = 0; - for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) - numReadGroups += header.getReadGroups().size(); - recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG); - - recalibrationEngine = initializeRecalibrationEngine(); - recalibrationEngine.initialize(requestedCovariates, recalibrationTables); - + initializeRecalibrationEngine(); minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; referenceReader = getToolkit().getReferenceDataSource().getReference(); } - private RecalibrationEngine initializeRecalibrationEngine() { + /** + * Initialize the recalibration engine + */ + private void initializeRecalibrationEngine() { + int numReadGroups = 0; + for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) + numReadGroups += header.getReadGroups().size(); - final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class); - try { - final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null); - constructor.setAccessible(true); - return (RecalibrationEngine)constructor.newInstance(); - } - catch (Exception e) { - throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName()); - } + recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode); } private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) { @@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche logger.info("Processed: " + result + " reads"); } + private RecalibrationTables getRecalibrationTable() { + return recalibrationEngine.getFinalRecalibrationTables(); + } + private void generatePlots() { File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; if (recalFile != null) { RecalibrationReport report = new RecalibrationReport(recalFile); - RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates); + RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates); } else - RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates); + RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates); } /** @@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche * generate a quantization map (recalibrated_qual -> quantized_qual) */ private void quantizeQualityScores() { - quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS); + quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS); } private void generateReport() { - RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS); + RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java index 121e3449b..b884b89db 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java @@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo { if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null"); if ( skips == null ) throw new IllegalArgumentException("skips cannot be null"); if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null"); - // future: may allow insertionErrors && deletionErrors to be null, so don't enforce + if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null"); + if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null"); this.read = read; this.baseQuals = read.getBaseQualities(); @@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo { if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length); if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length); - if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length); - if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length); + if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length); + if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 2f0f976fa..622413b18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection { public GATKReportTable generateReportTable(final String covariateNames) { GATKReportTable argumentsTable; if(SORT_BY_ALL_COLUMNS) { - argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true); + argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index 5c002b7e5..95ccecd6e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,35 +1,90 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.walkers.bqsr; import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -/* -* Copyright (c) 2009 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -*/ -public interface RecalibrationEngine { +import java.io.PrintStream; +import java.util.LinkedList; +import java.util.List; + +public class RecalibrationEngine { + final protected Covariate[] covariates; + final private int numReadGroups; + final private PrintStream maybeLogStream; + final private boolean lowMemoryMode; + + /** + * Has finalizeData() been called? + */ + private boolean finalized = false; + + /** + * The final (merged, etc) recalibration tables, suitable for downstream analysis. + */ + private RecalibrationTables finalRecalibrationTables = null; + + private final List recalibrationTablesList = new LinkedList(); + + private final ThreadLocal threadLocalTables = new ThreadLocal() { + private synchronized RecalibrationTables makeAndCaptureTable() { + final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream); + recalibrationTablesList.add(newTable); + return newTable; + } + + @Override + protected synchronized RecalibrationTables initialValue() { + if ( lowMemoryMode ) { + return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0); + } else { + return makeAndCaptureTable(); + } + } + }; + + /** + * Get a recalibration table suitable for updating the underlying RecalDatums + * + * May return a thread-local version, or a single version, depending on the initialization + * arguments of this instance. + * + * @return updated tables + */ + protected RecalibrationTables getUpdatableRecalibrationTables() { + return threadLocalTables.get(); + } + /** * Initialize the recalibration engine * @@ -40,21 +95,167 @@ public interface RecalibrationEngine { * The engine should collect match and mismatch data into the recalibrationTables data. * * @param covariates an array of the covariates we'll be using in this engine, order matters - * @param recalibrationTables the destination recalibrationTables where stats should be collected + * @param numReadGroups the number of read groups we should use for the recalibration tables + * @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables */ - public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); + public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) { + if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null"); + if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups); + + this.covariates = covariates.clone(); + this.numReadGroups = numReadGroups; + this.maybeLogStream = maybeLogStream; + this.lowMemoryMode = enableLowMemoryMode; + } /** * Update the recalibration statistics using the information in recalInfo * @param recalInfo data structure holding information about the recalibration values for a single read */ @Requires("recalInfo != null") - public void updateDataForRead(final ReadRecalibrationInfo recalInfo); + public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { + final GATKSAMRecord read = recalInfo.getRead(); + final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); + final RecalibrationTables tables = getUpdatableRecalibrationTables(); + final NestedIntegerArray qualityScoreTable = tables.getQualityScoreTable(); + + for( int offset = 0; offset < read.getReadBases().length; offset++ ) { + if( ! recalInfo.skip(offset) ) { + + for (final EventType eventType : EventType.values()) { + final int[] keys = readCovariates.getKeySet(offset, eventType); + final int eventIndex = eventType.ordinal(); + final byte qual = recalInfo.getQual(eventType, offset); + final double isError = recalInfo.getErrorFraction(eventType, offset); + + incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + + incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); + } + } + } + } + } + + /** + * creates a datum object with one observation and one or zero error + * + * @param reportedQual the quality score reported by the instrument for this base + * @param isError whether or not the observation is an error + * @return a new RecalDatum object with the observation and the error + */ + protected RecalDatum createDatumObject(final byte reportedQual, final double isError) { + return new RecalDatum(1, isError, reportedQual); + } /** * Finalize, if appropriate, all derived data in recalibrationTables. * * Called once after all calls to updateDataForRead have been issued. + * + * Assumes that all of the principal tables (by quality score) have been completely updated, + * and walks over this data to create summary data tables like by read group table. */ - public void finalizeData(); + public void finalizeData() { + if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called"); + + // merge all of the thread-local tables + finalRecalibrationTables = mergeThreadLocalRecalibrationTables(); + + final NestedIntegerArray byReadGroupTable = finalRecalibrationTables.getReadGroupTable(); + final NestedIntegerArray byQualTable = finalRecalibrationTables.getQualityScoreTable(); + + // iterate over all values in the qual table + for ( NestedIntegerArray.Leaf leaf : byQualTable.getAllLeaves() ) { + final int rgKey = leaf.keys[0]; + final int eventIndex = leaf.keys[2]; + final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex); + final RecalDatum qualDatum = leaf.value; + + if ( rgDatum == null ) { + // create a copy of qualDatum, and initialize byReadGroup table with it + byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex); + } else { + // combine the qual datum with the existing datum in the byReadGroup table + rgDatum.combine(qualDatum); + } + } + + finalized = true; + } + + /** + * Merge all of the thread local recalibration tables into a single one. + * + * Reuses one of the recalibration tables to hold the merged table, so this function can only be + * called once in the engine. + * + * @return the merged recalibration table + */ + @Requires("! finalized") + private RecalibrationTables mergeThreadLocalRecalibrationTables() { + if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty"); + + RecalibrationTables merged = null; + for ( final RecalibrationTables table : recalibrationTablesList ) { + if ( merged == null ) + // fast path -- if there's only only one table, so just make it the merged one + merged = table; + else { + merged.combine(table); + } + } + + return merged; + } + + /** + * Get the final recalibration tables, after finalizeData() has been called + * + * This returns the finalized recalibration table collected by this engine. + * + * It is an error to call this function before finalizeData has been called + * + * @return the finalized recalibration table collected by this engine + */ + public RecalibrationTables getFinalRecalibrationTables() { + if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called"); + return finalRecalibrationTables; + } + + /** + * Increments the RecalDatum at the specified position in the specified table, or put a new item there + * if there isn't already one. + * + * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() + * to return false if another thread inserts a new item at our position in the middle of our put operation. + * + * @param table the table that holds/will hold our item + * @param qual qual for this event + * @param isError error value for this event + * @param keys location in table of our item + */ + protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, + final byte qual, + final double isError, + final int... keys ) { + final RecalDatum existingDatum = table.get(keys); + + if ( existingDatum == null ) { + // No existing item, try to put a new one + if ( ! table.put(createDatumObject(qual, isError), keys) ) { + // Failed to put a new item because another thread came along and put an item here first. + // Get the newly-put item and increment it (item is guaranteed to exist at this point) + table.get(keys).increment(1L, isError); + } + } + else { + // Easy case: already an item here, so increment it + existingDatum.increment(1L, isError); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java deleted file mode 100644 index 4ad8ccdf3..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ /dev/null @@ -1,144 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -import org.broadinstitute.sting.utils.classloader.PublicPackageSource; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.recalibration.EventType; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.recalibration.RecalDatum; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource { - protected Covariate[] covariates; - protected RecalibrationTables recalibrationTables; - - @Override - public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { - if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null"); - if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null"); - - this.covariates = covariates.clone(); - this.recalibrationTables = recalibrationTables; - } - - @Override - public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { - final GATKSAMRecord read = recalInfo.getRead(); - final EventType eventType = EventType.BASE_SUBSTITUTION; - final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); - - for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( ! recalInfo.skip(offset) ) { - final byte qual = recalInfo.getQual(eventType, offset); - final double isError = recalInfo.getErrorFraction(eventType, offset); - final int[] keys = readCovariates.getKeySet(offset, eventType); - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index); - - for (int i = 2; i < covariates.length; i++) { - if (keys[i] < 0) - continue; - - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index); - } - } - } - } - - /** - * creates a datum object with one observation and one or zero error - * - * @param reportedQual the quality score reported by the instrument for this base - * @param isError whether or not the observation is an error - * @return a new RecalDatum object with the observation and the error - */ - protected RecalDatum createDatumObject(final byte reportedQual, final double isError) { - return new RecalDatum(1L, isError, reportedQual); - } - - /** - * Create derived recalibration data tables - * - * Assumes that all of the principal tables (by quality score) have been completely updated, - * and walks over this data to create summary data tables like by read group table. - */ - @Override - public void finalizeData() { - final NestedIntegerArray byReadGroupTable = recalibrationTables.getReadGroupTable(); - final NestedIntegerArray byQualTable = recalibrationTables.getQualityScoreTable(); - - // iterate over all values in the qual table - for ( NestedIntegerArray.Leaf leaf : byQualTable.getAllLeaves() ) { - final int rgKey = leaf.keys[0]; - final int eventIndex = leaf.keys[2]; - final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex); - final RecalDatum qualDatum = leaf.value; - - if ( rgDatum == null ) { - // create a copy of qualDatum, and initialize byReadGroup table with it - byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex); - } else { - // combine the qual datum with the existing datum in the byReadGroup table - rgDatum.combine(qualDatum); - } - } - } - - /** - * Increments the RecalDatum at the specified position in the specified table, or put a new item there - * if there isn't already one. - * - * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() - * to return false if another thread inserts a new item at our position in the middle of our put operation. - * - * @param table the table that holds/will hold our item - * @param qual qual for this event - * @param isError error value for this event - * @param keys location in table of our item - */ - protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, - final byte qual, - final double isError, - final int... keys ) { - final RecalDatum existingDatum = table.get(keys); - - if ( existingDatum == null ) { - // No existing item, try to put a new one - if ( ! table.put(createDatumObject(qual, isError), keys) ) { - // Failed to put a new item because another thread came along and put an item here first. - // Get the newly-put item and increment it (item is guaranteed to exist at this point) - table.get(keys).increment(1, isError); - } - } - else { - // Easy case: already an item here, so increment it - existingDatum.increment(1, isError); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index b4e781e91..98e581e21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker { public void initialize() { report = new GATKReport(); - report.addTable(reportName, reportDescription, 6, true, false); + report.addTable(reportName, reportDescription, 6); table = report.getTable(reportName); table.addColumn("readgroup"); table.addColumn("cycle"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 6af70811f..91efd1ffd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -162,7 +162,7 @@ public class VariantEvalReportWriter { // create the table final String tableName = ve.getSimpleName(); final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description(); - report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false); + report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size())); // grab the table, and add the columns we need to it final GATKReportTable table = report.getTable(tableName); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index c12dfcee9..d1199ad3d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -26,6 +26,11 @@ public class ActiveRegion implements HasGenomeLocation { private final GenomeLocParser genomeLocParser; public final boolean isActive; + // maximum stop position of all reads with start position in this active region + // Used only by ExperimentalReadShardTraverseActiveRegions + // NB: these reads may not be associated with this active region! + private int maxReadStop; + public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { this.activeRegionLoc = activeRegionLoc; this.isActive = isActive; @@ -33,6 +38,7 @@ public class ActiveRegion implements HasGenomeLocation { this.extension = extension; extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension); fullExtentReferenceLoc = extendedLoc; + maxReadStop = activeRegionLoc.getStart(); } @Override @@ -93,6 +99,18 @@ public class ActiveRegion implements HasGenomeLocation { public void remove( final GATKSAMRecord read ) { reads.remove( read ); } public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } + public void setMaxReadStop(int maxReadStop) { + this.maxReadStop = maxReadStop; + } + + public int getMaxReadStop() { + return maxReadStop; + } + + public int getExtendedMaxReadStop() { + return maxReadStop + extension; + } + public boolean equalExceptReads(final ActiveRegion other) { if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false; if ( isActive != other.isActive ) return false; diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java new file mode 100644 index 000000000..1e9a0ee94 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java @@ -0,0 +1,14 @@ +package org.broadinstitute.sting.utils.activeregion; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 1/2/13 + * Time: 4:59 PM + * To change this template use File | Settings | File Templates. + */ +public enum ExperimentalActiveRegionShardType { + LOCUSSHARD, // default/legacy type + READSHARD, + ACTIVEREGIONSHARD +} diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index 161335957..b36326722 100755 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -160,6 +160,13 @@ public class ProgressMeter { public ProgressMeter(final File performanceLogFile, final String processingUnitName, final GenomeLocSortedSet processingIntervals) { + this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS); + } + + protected ProgressMeter(final File performanceLogFile, + final String processingUnitName, + final GenomeLocSortedSet processingIntervals, + final long pollingFrequency) { if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null"); if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null"); @@ -184,10 +191,14 @@ public class ProgressMeter { targetSizeInBP = processingIntervals.coveredSize(); // start up the timer - progressMeterDaemon = new ProgressMeterDaemon(this); + progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency); start(); } + public ProgressMeterDaemon getProgressMeterDaemon() { + return progressMeterDaemon; + } + /** * Start up the progress meter, printing initialization message and starting up the * daemon thread for periodic printing. @@ -223,11 +234,13 @@ public class ProgressMeter { * the progress itself. A separate printing daemon periodically polls the meter to print out * progress * - * @param loc Current location, can be null if you are at the end of the processing unit + * @param loc Current location, can be null if you are at the end of the processing unit. Must + * have size == 1 (cannot be multiple bases in size). * @param nTotalRecordsProcessed the total number of records we've processed */ public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) { if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0"); + if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc); // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup) this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc)); diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java index 16887400a..7edd8e724 100644 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java @@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter; * Time: 9:16 PM */ public final class ProgressMeterDaemon extends Thread { + public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000; + /** * How frequently should we poll and print progress? */ - private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000; + private final long pollFrequencyMilliseconds; + + /** + * How long are we waiting between print progress calls are issued? + * @return the time in milliseconds between progress meter calls + */ + private long getPollFrequencyMilliseconds() { + return pollFrequencyMilliseconds; + } /** * Are we to continue periodically printing status, or should we shut down? @@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread { * Create a new ProgressMeterDaemon printing progress for meter * @param meter the progress meter to print progress of */ - public ProgressMeterDaemon(final ProgressMeter meter) { + public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) { if ( meter == null ) throw new IllegalArgumentException("meter cannot be null"); + if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds); + this.meter = meter; + this.pollFrequencyMilliseconds = pollFrequencyMilliseconds; setDaemon(true); setName("ProgressMeterDaemon"); } + public ProgressMeterDaemon(final ProgressMeter meter) { + this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS); + } + /** * Tells this daemon thread to shutdown at the next opportunity, as the progress * metering is complete. @@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread { this.done = true; } + /** + * Is this daemon thread done? + * @return true if done, false otherwise + */ + public boolean isDone() { + return done; + } + /** * Start up the ProgressMeterDaemon, polling every tens of seconds to print, if * necessary, the provided progress meter. Never exits until the JVM is complete, @@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread { while (! done) { meter.printProgress(false); try { - Thread.sleep(POLL_FREQUENCY_MILLISECONDS); + Thread.sleep(getPollFrequencyMilliseconds()); } catch (InterruptedException e) { throw new RuntimeException(e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 3e0f36799..43c9bd2b5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -198,7 +198,7 @@ public class BaseRecalibration { } private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) { - final Double cached = globalDeltaQs.get(rgKey, errorModel.index); + final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal()); if ( TEST_CACHING ) { final double calcd = calculateGlobalDeltaQ(rgKey, errorModel); @@ -210,7 +210,7 @@ public class BaseRecalibration { } private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) { - final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index); + final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal()); if ( TEST_CACHING ) { final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey); @@ -240,7 +240,7 @@ public class BaseRecalibration { private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) { double result = 0.0; - final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index); + final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal()); if (empiricalQualRG != null) { final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); @@ -254,7 +254,7 @@ public class BaseRecalibration { private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) { double result = 0.0; - final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index); + final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal()); if (empiricalQualQS != null) { final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; @@ -287,7 +287,7 @@ public class BaseRecalibration { final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) { - final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index); + final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal()); if (empiricalQualCO != null) { final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java index 1c84518eb..63f873892 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java @@ -1,41 +1,39 @@ package org.broadinstitute.sting.utils.recalibration; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - public enum EventType { - BASE_SUBSTITUTION(0, "M", "Base Substitution"), - BASE_INSERTION(1, "I", "Base Insertion"), - BASE_DELETION(2, "D", "Base Deletion"); + BASE_SUBSTITUTION("M", "Base Substitution"), + BASE_INSERTION("I", "Base Insertion"), + BASE_DELETION("D", "Base Deletion"); - public final int index; private final String representation; private final String longRepresentation; - private EventType(int index, String representation, String longRepresentation) { - this.index = index; + private EventType(String representation, String longRepresentation) { this.representation = representation; this.longRepresentation = longRepresentation; } + /** + * Get the EventType corresponding to its ordinal index + * @param index an ordinal index + * @return the event type corresponding to ordinal index + */ public static EventType eventFrom(int index) { - switch (index) { - case 0: - return BASE_SUBSTITUTION; - case 1: - return BASE_INSERTION; - case 2: - return BASE_DELETION; - default: - throw new ReviewedStingException(String.format("Event %d does not exist.", index)); - } + return EventType.values()[index]; } - - public static EventType eventFrom(String event) { + + /** + * Get the EventType with short string representation + * @throws IllegalArgumentException if representation doesn't correspond to one of EventType + * @param representation short string representation of the event + * @return an EventType + */ + public static EventType eventFrom(String representation) { for (EventType eventType : EventType.values()) - if (eventType.representation.equals(event)) + if (eventType.representation.equals(representation)) return eventType; - throw new ReviewedStingException(String.format("Event %s does not exist.", event)); + throw new IllegalArgumentException(String.format("Event %s does not exist.", representation)); } @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index e0c1261fe..5cf16dc9f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -67,10 +67,10 @@ public class QuantizationInfo { return quantizationLevels; } - public GATKReportTable generateReportTable(boolean sortBycols) { + public GATKReportTable generateReportTable(boolean sortByCols) { GATKReportTable quantizedTable; - if(sortBycols) { - quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true); + if(sortByCols) { + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java index 4ddcb2b92..405b2d143 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java @@ -68,9 +68,9 @@ public class ReadCovariates { * @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates */ public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) { - keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch; - keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion; - keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion; + keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch; + keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion; + keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion; } /** @@ -81,11 +81,11 @@ public class ReadCovariates { * @return */ public int[] getKeySet(final int readPosition, final EventType errorModel) { - return keys[errorModel.index][readPosition]; + return keys[errorModel.ordinal()][readPosition]; } public int[][] getKeySet(final EventType errorModel) { - return keys[errorModel.index]; + return keys[errorModel.ordinal()]; } // ---------------------------------------------------------------------- @@ -94,17 +94,9 @@ public class ReadCovariates { // // ---------------------------------------------------------------------- - protected int[][] getMismatchesKeySet() { - return keys[EventType.BASE_SUBSTITUTION.index]; - } - - protected int[][] getInsertionsKeySet() { - return keys[EventType.BASE_INSERTION.index]; - } - - protected int[][] getDeletionsKeySet() { - return keys[EventType.BASE_DELETION.index]; - } + protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); } + protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); } + protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); } protected int[] getMismatchesKeySet(final int readPosition) { return getKeySet(readPosition, EventType.BASE_SUBSTITUTION); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 9a9a5dfc6..cf27df2eb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -287,7 +287,7 @@ public class RecalUtils { final GATKReportTable reportTable; if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) { if(sortByCols) { - reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true); + reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index ac451221e..3264677b9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -133,7 +133,7 @@ public class RecalibrationReport { tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue); final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); - tempCOVarray[3] = event.index; + tempCOVarray[3] = event.ordinal(); recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray); } @@ -152,7 +152,7 @@ public class RecalibrationReport { final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual); final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); - tempQUALarray[2] = event.index; + tempQUALarray[2] = event.ordinal(); qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray); } @@ -169,7 +169,7 @@ public class RecalibrationReport { final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); - tempRGarray[1] = event.index; + tempRGarray[1] = event.ordinal(); rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index 3f968d7f6..a6b1e13b9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -123,12 +123,16 @@ public final class RecalibrationTables { } /** - * Merge in the quality score table information from qualityScoreTable into this - * recalibration table's quality score table. - * - * @param qualityScoreTable the quality score table we want to merge in + * Merge all of the tables from toMerge into into this set of tables */ - public void combineQualityScoreTable(final NestedIntegerArray qualityScoreTable) { - RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable); + public void combine(final RecalibrationTables toMerge) { + if ( numTables() != toMerge.numTables() ) + throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes"); + + for ( int i = 0; i < numTables(); i++ ) { + final NestedIntegerArray myTable = this.getTable(i); + final NestedIntegerArray otherTable = toMerge.getTable(i); + RecalUtils.combineTables(myTable, otherTable); + } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index d20b70b42..0637e9a25 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -32,6 +32,13 @@ import org.testng.annotations.Test; import java.io.File; import java.io.IOException; import java.io.PrintStream; +import java.util.Random; +import java.io.FileInputStream; +import java.io.DataInputStream; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.ArrayList; + public class GATKReportUnitTest extends BaseTest { @Test @@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest { Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'"); } + private GATKReportTable getTableWithRandomValues() { + Random number = new Random(123L); + final int VALUESRANGE = 10; + + GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3"); + GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN ); + + final int NUMROWS = 100; + for (int x = 0; x < NUMROWS; x++) { + report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE)); + } + return table; + } + + @Test(enabled = true) + public void testSortingByColumn() { + Assert.assertEquals(isSorted(getTableWithRandomValues()), true); + } + + private boolean isSorted(GATKReportTable table) { + boolean result = true; + File testingSortingTableFile = new File("testSortingFile.txt"); + + try { + // Connect print stream to the output stream + PrintStream ps = new PrintStream(testingSortingTableFile); + table.write(ps); + ps.close(); + } + catch (Exception e){ + System.err.println ("Error: " + e.getMessage()); + } + + ArrayList rows = new ArrayList(); + try { + // Open the file + FileInputStream fStream = new FileInputStream(testingSortingTableFile); + // Get the object of DataInputStream + DataInputStream in = new DataInputStream(fStream); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + String strLine; + //Read File Line By Line + while ((strLine = br.readLine()) != null) { + + String[] parts = strLine.split(" "); + int l = parts.length; + int[] row = new int[l]; + for(int n = 0; n < l; n++) { + row[n] = Integer.parseInt(parts[n]); + } + rows.add(row); + } + //Close the input stream + in.close(); + } catch (Exception e){//Catch exception if any + System.err.println("Error: " + e.getMessage()); + } + for (int x = 1; x < rows.size() && result; x++) { + result = checkRowOrder(rows.get(x - 1), rows.get(x)); + } + return result; + } + + private boolean checkRowOrder(int[] row1, int[] row2) { + int l = row1.length; + final int EQUAL = 0; + + int result = EQUAL; + + for(int x = 0; x < l && ( result <= EQUAL); x++) { + result = ((Integer)row1[x]).compareTo(row2[x]); + } + if (result <= EQUAL) { + return true; + } else { + return false; + } + } + private GATKReportTable makeBasicTable() { GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value"); GATKReportTable table = report.getTable("TableName"); @@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest { table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); - report1.addTable("Table3", "blah", 1, true, false); + report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW); report1.getTable("Table3").addColumn("a"); report1.getTable("Table3").addRowIDMapping("q", 2); report1.getTable("Table3").addRowIDMapping("5", 3); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 4cda1455e..0ec4f57f6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -3,10 +3,16 @@ package org.broadinstitute.sting.gatk.traversals; import com.google.java.contract.PreconditionError; import net.sf.samtools.*; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; +import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -15,7 +21,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -28,6 +33,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; +import org.testng.TestException; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -95,7 +101,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } } - private final TraverseActiveRegions t = new TraverseActiveRegions(); + private final TraverseActiveRegions traverse = new TraverseActiveRegions(); + private final ExperimentalReadShardTraverseActiveRegions readShardTraverse = new ExperimentalReadShardTraverseActiveRegions(); + private final ExperimentalActiveRegionShardTraverseActiveRegions activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions(); private IndexedFastaSequenceFile reference; private SAMSequenceDictionary dictionary; @@ -106,21 +114,22 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { private static final String testBAM = "TraverseActiveRegionsUnitTest.bam"; private static final String testBAI = "TraverseActiveRegionsUnitTest.bai"; + private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD; + @BeforeClass private void init() throws FileNotFoundException { reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); - // TODO: test shard boundaries // TODO: reads with indels // TODO: reads which span many regions // TODO: reads which are partially between intervals (in/outside extension) // TODO: duplicate reads - + // TODO: read at the end of a contig // TODO: reads which are completely outside intervals but within extension // TODO: test the extension itself - + // TODO: unmapped reads intervals = new ArrayList(); intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); @@ -142,6 +151,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); + reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385)); + reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400)); + reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414)); reads.add(buildSAMRecord("simple20", "20", 10025, 10075)); createBAM(reads); @@ -153,7 +165,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { File indexFile = new File(testBAI); indexFile.deleteOnExit(); - SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile); + SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile); for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) { out.addAlignment(read); } @@ -171,8 +183,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { List activeIntervals = new ArrayList(); - for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) { - t.traverse(walker, dataProvider, 0); + for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) { + traverse(walker, dataProvider, 0); activeIntervals.addAll(walker.isActiveCalls); } @@ -272,6 +284,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927 + // shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 + // shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -286,6 +301,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region, "boundary_equal", "boundary_1_post"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); + verifyReadMapping(region, "shard_boundary_1_pre"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); + verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); verifyReadMapping(region, "simple20"); } @@ -309,6 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927 + // shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 + // shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -323,6 +347,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); + verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); + verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); verifyReadMapping(region, "simple20"); } @@ -347,6 +377,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927 + // shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 + // shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -361,6 +394,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); + verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); + verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); + region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); verifyReadMapping(region, "simple20"); } @@ -382,10 +421,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } private Map getActiveRegions(DummyActiveRegionWalker walker, List intervals) { - for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) - t.traverse(walker, dataProvider, 0); + for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) + traverse(walker, dataProvider, 0); - t.endTraversal(walker, 0); + endTraversal(walker, 0); return walker.mappedActiveRegions; } @@ -429,6 +468,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); @@ -445,10 +485,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { return record; } - private List createDataProviders(List intervals, String bamFile) { + private List createDataProviders(List intervals, String bamFile) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); - t.initialize(engine); + GATKArgumentCollection arguments = new GATKArgumentCollection(); + arguments.activeRegionShardType = shardType; // make explicit + engine.setArguments(arguments); Collection samFiles = new ArrayList(); SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags()); @@ -456,13 +498,65 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser); - List providers = new ArrayList(); - for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { - for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { - providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); - } + List providers = new ArrayList(); + + switch (shardType) { + case LOCUSSHARD: + traverse.initialize(engine); + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { + providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + } + } + break; + case READSHARD: + readShardTraverse.initialize(engine); + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) { + providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList())); + } + break; + case ACTIVEREGIONSHARD: + activeRegionShardTraverse.initialize(engine); + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { + providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList())); + } + } + break; + default: throw new TestException("Invalid shard type"); } return providers; } + + private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) { + switch (shardType) { + case LOCUSSHARD: + traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i); + break; + case READSHARD: + readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i); + break; + case ACTIVEREGIONSHARD: + activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i); + break; + default: throw new TestException("Invalid shard type"); + } + } + + private void endTraversal(DummyActiveRegionWalker walker, int i) { + switch (shardType) { + case LOCUSSHARD: + traverse.endTraversal(walker, i); + break; + case READSHARD: + readShardTraverse.endTraversal(walker, i); + break; + case ACTIVEREGIONSHARD: + activeRegionShardTraverse.endTraversal(walker, i); + break; + default: throw new TestException("Invalid shard type"); + } + } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfoUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfoUnitTest.java new file mode 100644 index 000000000..08a8f2dc1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfoUnitTest.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import net.sf.samtools.SAMUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumMap; +import java.util.List; + +public final class ReadRecalibrationInfoUnitTest extends BaseTest { + @DataProvider(name = "InfoProvider") + public Object[][] createCombineTablesProvider() { + List tests = new ArrayList(); + + for ( final int readLength: Arrays.asList(10, 100, 1000) ) { + for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) { + tests.add(new Object[]{readLength, includeIndelErrors}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "InfoProvider") + public void testReadInfo(final int readLength, final boolean includeIndelErrors) { + final ReadCovariates covariates = new ReadCovariates(readLength, 2); + + final byte[] bases = new byte[readLength]; + final byte[] baseQuals = new byte[readLength]; + final byte[] insertionQuals = new byte[readLength]; + final byte[] deletionQuals = new byte[readLength]; + final boolean[] skips = new boolean[readLength]; + final double[] snpErrors = new double[readLength]; + final double[] insertionErrors = new double[readLength]; + final double[] deletionsErrors = new double[readLength]; + for ( int i = 0; i < readLength; i++ ) { + bases[i] = 'A'; + baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE); + insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE); + deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE); + skips[i] = i % 2 == 0; + snpErrors[i] = 1.0 / (i+1); + insertionErrors[i] = 0.5 / (i+1); + deletionsErrors[i] = 0.3 / (i+1); + } + + final EnumMap errors = new EnumMap(EventType.class); + errors.put(EventType.BASE_SUBSTITUTION, snpErrors); + errors.put(EventType.BASE_INSERTION, insertionErrors); + errors.put(EventType.BASE_DELETION, deletionsErrors); + + final EnumMap quals = new EnumMap(EventType.class); + quals.put(EventType.BASE_SUBSTITUTION, baseQuals); + quals.put(EventType.BASE_INSERTION, insertionQuals); + quals.put(EventType.BASE_DELETION, deletionQuals); + + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M"); + if ( includeIndelErrors ) { + read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION); + read.setBaseQualities(deletionQuals, EventType.BASE_DELETION); + } + + final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors); + + Assert.assertEquals(info.getCovariatesValues(), covariates); + Assert.assertEquals(info.getRead(), read); + + for ( int i = 0; i < readLength; i++ ) { + Assert.assertEquals(info.skip(i), skips[i]); + for ( final EventType et : EventType.values() ) { + Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]); + final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL; + Assert.assertEquals(info.getQual(et, i), expectedQual); + } + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java new file mode 100644 index 000000000..420db683e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.progressmeter; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +/** + * UnitTests for the ProgressMeterDaemon + * + * User: depristo + * Date: 8/24/12 + * Time: 11:25 AM + * To change this template use File | Settings | File Templates. + */ +public class ProgressMeterDaemonUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference))); + } + + // capture and count calls to progress + private class TestingProgressMeter extends ProgressMeter { + final List progressCalls = new LinkedList(); + + private TestingProgressMeter(final long poll) { + super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll); + } + + @Override + protected synchronized void printProgress(boolean mustPrint) { + progressCalls.add(System.currentTimeMillis()); + } + } + + @DataProvider(name = "PollingData") + public Object[][] makePollingData() { + List tests = new ArrayList(); + for ( final int ticks : Arrays.asList(1, 5, 10) ) { + for ( final int poll : Arrays.asList(10, 100) ) { + tests.add(new Object[]{poll, ticks}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90) + public void testMe(final long poll, final int ticks) throws InterruptedException { + final TestingProgressMeter meter = new TestingProgressMeter(poll); + final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon(); + Assert.assertTrue(daemon.isDaemon()); + + Assert.assertFalse(daemon.isDone()); + Thread.sleep(ticks * poll); + Assert.assertFalse(daemon.isDone()); + + daemon.done(); + Assert.assertTrue(daemon.isDone()); + + Assert.assertEquals(meter.progressCalls.size(), ticks, + "Expected " + ticks + " progress calls from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDataUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDataUnitTest.java new file mode 100644 index 000000000..d6ea3b227 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDataUnitTest.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.progressmeter; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.AutoFormattingTime; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * UnitTests for the ProgressMeterData + * + * User: depristo + * Date: 8/24/12 + * Time: 11:25 AM + * To change this template use File | Settings | File Templates. + */ +public class ProgressMeterDataUnitTest extends BaseTest { + @Test + public void testBasic() { + Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0); + Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2); + Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3); + } + + @Test + public void testFraction() { + final double TOL = 1e-3; + Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL); + } + + @Test + public void testSecondsPerBP() { + final double TOL = 1e-3; + final long M = 1000000; + Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL); + Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL); + } + + @Test + public void testSecondsPerElement() { + final double TOL = 1e-3; + final long M = 1000000; + Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL); + Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL); + Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/EventTypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/EventTypeUnitTest.java new file mode 100644 index 000000000..53645e224 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/EventTypeUnitTest.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.*; + +public final class EventTypeUnitTest extends BaseTest { + @Test + public void testEventTypes() { + for ( final EventType et : EventType.values() ) { + Assert.assertNotNull(et.toString()); + Assert.assertNotNull(et.prettyPrint()); + Assert.assertFalse("".equals(et.toString())); + Assert.assertFalse("".equals(et.prettyPrint())); + Assert.assertEquals(EventType.eventFrom(et.ordinal()), et); + Assert.assertEquals(EventType.eventFrom(et.toString()), et); + } + } + + @Test + public void testEventTypesEnumItself() { + final Set shortReps = new HashSet(); + for ( final EventType et : EventType.values() ) { + Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et); + shortReps.add(et.toString()); + } + Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testBadString() { + EventType.eventFrom("asdfhalsdjfalkjsdf"); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index 43c9245d7..f95fae464 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -20,9 +20,7 @@ public class RecalibrationReportUnitTest { private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { final Random random = new Random(); final int nObservations = random.nextInt(maxObservations); - int nErrors = random.nextInt(maxErrors); - while ( nErrors > nObservations ) - nErrors = random.nextInt(maxErrors); + final int nErrors = Math.min(random.nextInt(maxErrors), nObservations); final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual); } @@ -92,14 +90,14 @@ public class RecalibrationReportUnitTest { final int[] covariates = rc.getKeySet(offset, errorMode); final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000; - rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index); - qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index); + rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal()); + qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal()); nKeys += 2; for (int j = 0; j < optionalCovariates.size(); j++) { final NestedIntegerArray covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j); final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j]; if ( covValue >= 0 ) { - covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index); + covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal()); nKeys++; } }