Resolving merge conflicts after Mark's latest push

This commit is contained in:
Eric Banks 2013-01-04 14:46:39 -05:00
commit bce6fce58d
41 changed files with 1896 additions and 516 deletions

View File

@ -1,99 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.LinkedList;
import java.util.List;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
final List<NestedIntegerArray<RecalDatum>> allThreadLocalQualityScoreTables = new LinkedList<NestedIntegerArray<RecalDatum>>();
private ThreadLocal<NestedIntegerArray<RecalDatum>> threadLocalQualityScoreTables = new ThreadLocal<NestedIntegerArray<RecalDatum>>() {
@Override
protected synchronized NestedIntegerArray<RecalDatum> initialValue() {
final NestedIntegerArray<RecalDatum> table = recalibrationTables.makeQualityScoreTable();
allThreadLocalQualityScoreTables.add(table);
return table;
}
};
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final NestedIntegerArray<RecalDatum> qualityScoreTable = getThreadLocalQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* Get a NestedIntegerArray for a QualityScore table specific to this thread
* @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
*/
private NestedIntegerArray<RecalDatum> getThreadLocalQualityScoreTable() {
return threadLocalQualityScoreTables.get();
}
@Override
public void finalizeData() {
// merge in all of the thread local tables
logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
for ( final NestedIntegerArray<RecalDatum> localTable : allThreadLocalQualityScoreTables ) {
recalibrationTables.combineQualityScoreTable(localTable);
}
allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
super.finalizeData();
}
}

View File

@ -570,11 +570,32 @@ public class GenomeAnalysisEngine {
else if(walker instanceof ActiveRegionWalker) {
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
}
switch(argCollection.activeRegionShardType) {
case LOCUSSHARD:
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
case READSHARD:
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
new LegacyReadShardBalancer() :
new ReadShardBalancer();
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer);
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer);
case ACTIVEREGIONSHARD:
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer());
default:
throw new UserException.CommandLineException("Invalid active region shard type.");
}
}
else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
// Apply special validation to read pair walkers.
if(walker instanceof ReadPairWalker) {

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
@ -448,5 +449,14 @@ public class GATKArgumentCollection {
@Hidden
public boolean generateShadowBCF = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
// --------------------------------------------------------------------------------------------------------------
//
// Experimental Active Region Traversal modes
//
// --------------------------------------------------------------------------------------------------------------
@Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false)
public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Collection;
/**
* @author Joel Thibault
*/
public class ActiveRegionShardDataProvider extends ShardDataProvider {
final private ReadShardDataProvider readProvider;
final private LocusShardDataProvider locusProvider;
public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard, genomeLocParser, reference, rods); // TODO: necessary?
readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods);
locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods);
}
public ReadShardDataProvider getReadShardDataProvider() {
return readProvider;
}
public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) {
return locusProvider;
}
}

View File

@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
*/
private final ReadProperties sourceInfo;
/**
* The parser, used to create and build new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
@ -45,7 +40,22 @@ public class LocusShardDataProvider extends ShardDataProvider {
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,genomeLocParser,reference,rods);
this.sourceInfo = sourceInfo;
this.genomeLocParser = genomeLocParser;
this.locus = locus;
this.locusIterator = locusIterator;
}
/**
* Create a data provider based on an input provider
* Used only by ExperimentalReadShardTraverseActiveRegions
* @param dataProvider
* @param sourceInfo
* @param genomeLocParser
* @param locus
* @param locusIterator
*/
public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) {
super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData());
this.sourceInfo = sourceInfo;
this.locus = locus;
this.locusIterator = locusIterator;
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileSpan;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.List;
import java.util.Map;
/**
* @author Joel Thibault
*/
public class ActiveRegionShard extends ReadShard {
public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
super(parser, readsDataSource, fileSpans, loci, isUnmapped);
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
/**
* @author Joel Thibault
*/
public class ActiveRegionShardBalancer extends ReadShardBalancer {
// TODO ?
}

View File

@ -40,7 +40,9 @@ import java.util.Map;
*/
public abstract class Shard implements HasGenomeLocation {
public enum ShardType {
READ, LOCUS
READ,
LOCUS,
ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions
}
protected final GenomeLocParser parser; // incredibly annoying!

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.executive;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
@ -11,6 +12,8 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions;
import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -78,6 +81,18 @@ public class LinearMicroScheduler extends MicroScheduler {
}
windowMaker.close();
}
else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) {
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
if ( walker.isDone() ) break;
}
windowMaker.close();
}
else {
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
@ -93,6 +108,14 @@ public class LinearMicroScheduler extends MicroScheduler {
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) {
final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) {
final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
Object result = accumulator.finishTraversal();

View File

@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
@ -245,7 +246,12 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
} else if (walker instanceof ReadPairWalker) {
return new TraverseReadPairs();
} else if (walker instanceof ActiveRegionWalker) {
return new TraverseActiveRegions();
switch (engine.getArguments().activeRegionShardType) {
case LOCUSSHARD: return new TraverseActiveRegions();
case READSHARD: return new ExperimentalReadShardTraverseActiveRegions();
case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions();
default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker.");
}
} else {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}

View File

@ -117,7 +117,7 @@ public class GATKReport {
* @param numColumns the number of columns in this table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
addTable(tableName, tableDescription, numColumns, false, false);
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
}
/**
@ -126,11 +126,10 @@ public class GATKReport {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort the rows by the row ID
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
* @param sortingWay way to sort table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
tables.put(tableName, table);
}

View File

@ -46,8 +46,7 @@ public class GATKReportTable {
private final String tableName;
private final String tableDescription;
private final boolean sortByRowID;
private final boolean sortByAllColumns;
private final TableSortingWay sortingWay;
private List<Object[]> underlyingData;
private final List<GATKReportColumn> columnInfo;
@ -73,6 +72,12 @@ public class GATKReportTable {
public int index() { return index; }
}
public enum TableSortingWay {
SORT_BY_ROW,
SORT_BY_COLUMN,
DO_NOT_SORT
}
protected enum TableNameHeaderFields {
NAME(2),
DESCRIPTION(3);
@ -107,10 +112,7 @@ public class GATKReportTable {
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
// when reading from a file, we do not re-sort the rows
sortByRowID = false;
// when reading from a file, we do not re-sort the rows
sortByAllColumns = false;
sortingWay = TableSortingWay.DO_NOT_SORT;
// initialize the data
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
@ -181,7 +183,7 @@ public class GATKReportTable {
* @param numColumns the number of columns in this table
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
this(tableName, tableDescription, numColumns, true, false);
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
}
/**
@ -190,10 +192,9 @@ public class GATKReportTable {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
if ( !isValidName(tableName) ) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
@ -204,8 +205,7 @@ public class GATKReportTable {
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByRowID = sortByRowID;
this.sortByAllColumns = sortByAllColumns;
this.sortingWay = sortingWay;
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
@ -218,7 +218,7 @@ public class GATKReportTable {
* @param tableToCopy
*/
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
addColumn(column.getColumnName(), column.getFormat());
if ( copyData )
@ -569,56 +569,53 @@ public class GATKReportTable {
out.println();
// write the table body
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
}
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
} else if ( sortByRowID ) {
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
break;
case SORT_BY_ROW:
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
} else {
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
break;
case DO_NOT_SORT:
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
out.println();
}
@ -735,53 +732,47 @@ public class GATKReportTable {
}
private List<Object[]> getOrderedRows() {
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
if( result != EQUAL) {
return result;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
});
return underlyingData;
case SORT_BY_ROW:
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
});
return underlyingData;
} else if ( !sortByRowID ) {
return underlyingData;
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
default:
return underlyingData;
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
}
}

View File

@ -0,0 +1,309 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
public class ExperimentalActiveRegionShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ActiveRegionShardDataProvider> {
/**
* our log, which we want to capture anything from this class
*/
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
@Override
public String getTraversalUnits() {
return "active regions";
}
@Override
public T traverse( final ActiveRegionWalker<M,T> walker,
final ActiveRegionShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider));
ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider();
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
final ReadView readView = new ReadView(readDataProvider);
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
Shard readShard = readDataProvider.getShard();
SAMFileHeader header = readShard.getReadProperties().getHeader();
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator);
final LocusView locusView = new AllLocusView(locusDataProvider);
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
// We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
// we've move across some interval boundary, restart profile
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
}
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
// Call the walkers isActive function for this locus and add them to the list to be integrated later
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
prevLoc = location;
printProgress(locus.getLocation());
}
locusDataProvider.close();
}
windowMaker.close();
updateCumulativeMetrics(readDataProvider.getShard());
if ( ! profile.isEmpty() )
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
// add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) {
final ActiveRegion last = workQueue.getLast();
final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast();
activeRegions.remove(first);
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
}
}
workQueue.addAll( activeRegions );
}
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
// now process the active regions, where possible
boolean emptyQueue = false;
sum = processActiveRegions(walker, sum, emptyQueue);
return sum;
}
/**
* Take the individual isActive calls and integrate them into contiguous active regions and
* add these blocks of work to the work queue
* band-pass filter the list of isActive probabilities and turn into active regions
*
* @param profile
* @param activeRegions
* @param activeRegionExtension
* @param maxRegionSize
* @return
*/
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
final List<ActiveRegion> activeRegions,
final int activeRegionExtension,
final int maxRegionSize) {
if ( profile.isEmpty() )
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
}
// --------------------------------------------------------------------------------
//
// simple utility functions
//
// --------------------------------------------------------------------------------
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}
}
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
final LocusView locusView) {
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
return new ManagingReferenceOrderedView( dataProvider );
else
return (RodLocusView)locusView;
}
// --------------------------------------------------------------------------------
//
// code to handle processing active regions
//
// --------------------------------------------------------------------------------
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
if( walker.activeRegionOutStream != null ) {
writeActiveRegionsToStream(walker);
return sum;
} else {
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
}
}
/**
* Write out each active region to the walker activeRegionOutStream
*
* @param walker
*/
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
// Just want to output the active regions to a file, not actually process them
for( final ActiveRegion activeRegion : workQueue ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
}
}
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
// TODO can implement parallel traversal here
while( workQueue.peekFirst() != null ) {
ActiveRegion firstRegion = workQueue.getFirst();
final String firstRegionContig = firstRegion.getLocation().getContig();
if (emptyQueue || firstRegionContig != lastRegionContig) {
sum = processFirstActiveRegion(sum, walker);
}
else {
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
if (lastRegionStart > firstRegionMaxReadStop) {
sum = processFirstActiveRegion( sum, walker );
}
else {
break;
}
}
}
return sum;
}
/**
* Process the first active region and all remaining reads which overlap
*
* Remove the first active region from the queue
* (NB: some reads associated with this active region may have already been processed)
*
* Remove all of these reads from the queue
* (NB: some may be associated with other active regions)
*
* @param sum
* @param walker
* @return
*/
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
final ActiveRegion firstRegion = workQueue.removeFirst();
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
ActiveRegion bestRegion = firstRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
bestRegion = otherRegionToTest;
}
}
bestRegion.add( firstRead );
// The read is also added to all other regions in which it overlaps but marked as non-primary
if( walker.wantsNonPrimaryReads() ) {
if( !bestRegion.equals(firstRegion) ) {
firstRegion.add(firstRead);
}
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) ) {
// check for non-primary vs. extended
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
}
}
}
}
// check for non-primary vs. extended
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
if ( walker.wantsNonPrimaryReads() ) {
firstRegion.add( firstRead );
}
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
firstRegion.add( firstRead );
}
myReads.removeFirst();
firstRead = myReads.peekFirst();
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
}
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
final M x = walker.map( firstRegion, null );
return walker.reduce(x, sum);
}
/**
* Special function called in LinearMicroScheduler to empty out the work queue.
* Ugly for now but will be cleaned up when we push this functionality more into the engine
*/
public T endTraversal( final Walker<M,T> walker, T sum) {
boolean emptyQueue = true;
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
}
}

View File

@ -0,0 +1,309 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
public class ExperimentalReadShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ReadShardDataProvider> {
/**
* our log, which we want to capture anything from this class
*/
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
@Override
public String getTraversalUnits() {
return "active regions";
}
@Override
public T traverse( final ActiveRegionWalker<M,T> walker,
final ReadShardDataProvider readDataProvider,
T sum) {
logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider));
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
final ReadView readView = new ReadView(readDataProvider);
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
Shard readShard = readDataProvider.getShard();
SAMFileHeader header = readShard.getReadProperties().getHeader();
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider,
iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator);
final LocusView locusView = new AllLocusView(locusDataProvider);
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
// We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
// we've move across some interval boundary, restart profile
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
}
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
// Call the walkers isActive function for this locus and add them to the list to be integrated later
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
prevLoc = location;
printProgress(locus.getLocation());
}
locusDataProvider.close();
}
windowMaker.close();
updateCumulativeMetrics(readDataProvider.getShard());
if ( ! profile.isEmpty() )
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
// add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) {
final ActiveRegion last = workQueue.getLast();
final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast();
activeRegions.remove(first);
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
}
}
workQueue.addAll( activeRegions );
}
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
// now process the active regions, where possible
boolean emptyQueue = false;
sum = processActiveRegions(walker, sum, emptyQueue);
return sum;
}
/**
* Take the individual isActive calls and integrate them into contiguous active regions and
* add these blocks of work to the work queue
* band-pass filter the list of isActive probabilities and turn into active regions
*
* @param profile
* @param activeRegions
* @param activeRegionExtension
* @param maxRegionSize
* @return
*/
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
final List<ActiveRegion> activeRegions,
final int activeRegionExtension,
final int maxRegionSize) {
if ( profile.isEmpty() )
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
}
// --------------------------------------------------------------------------------
//
// simple utility functions
//
// --------------------------------------------------------------------------------
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}
}
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
final LocusView locusView) {
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
return new ManagingReferenceOrderedView( dataProvider );
else
return (RodLocusView)locusView;
}
// --------------------------------------------------------------------------------
//
// code to handle processing active regions
//
// --------------------------------------------------------------------------------
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
if( walker.activeRegionOutStream != null ) {
writeActiveRegionsToStream(walker);
return sum;
} else {
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
}
}
/**
* Write out each active region to the walker activeRegionOutStream
*
* @param walker
*/
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
// Just want to output the active regions to a file, not actually process them
for( final ActiveRegion activeRegion : workQueue ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
}
}
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
// TODO can implement parallel traversal here
while( workQueue.peekFirst() != null ) {
ActiveRegion firstRegion = workQueue.getFirst();
final String firstRegionContig = firstRegion.getLocation().getContig();
if (emptyQueue || firstRegionContig != lastRegionContig) {
sum = processFirstActiveRegion(sum, walker);
}
else {
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
if (lastRegionStart > firstRegionMaxReadStop) {
sum = processFirstActiveRegion( sum, walker );
}
else {
break;
}
}
}
return sum;
}
/**
* Process the first active region and all remaining reads which overlap
*
* Remove the first active region from the queue
* (NB: some reads associated with this active region may have already been processed)
*
* Remove all of these reads from the queue
* (NB: some may be associated with other active regions)
*
* @param sum
* @param walker
* @return
*/
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
final ActiveRegion firstRegion = workQueue.removeFirst();
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
ActiveRegion bestRegion = firstRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
bestRegion = otherRegionToTest;
}
}
bestRegion.add( firstRead );
// The read is also added to all other regions in which it overlaps but marked as non-primary
if( walker.wantsNonPrimaryReads() ) {
if( !bestRegion.equals(firstRegion) ) {
firstRegion.add(firstRead);
}
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) ) {
// check for non-primary vs. extended
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
}
}
}
}
// check for non-primary vs. extended
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
if ( walker.wantsNonPrimaryReads() ) {
firstRegion.add( firstRead );
}
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
firstRegion.add( firstRead );
}
myReads.removeFirst();
firstRead = myReads.peekFirst();
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
}
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
final M x = walker.map( firstRegion, null );
return walker.reduce(x, sum);
}
/**
* Special function called in LinearMicroScheduler to empty out the work queue.
* Ugly for now but will be cleaned up when we push this functionality more into the engine
*/
public T endTraversal( final Walker<M,T> walker, T sum) {
boolean emptyQueue = true;
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
}
}

View File

@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
public T traverse( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
final LocusView locusView = new AllLocusView(dataProvider);
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
final ActiveRegion activeRegion = workQueue.remove();
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
sum = processActiveRegion( activeRegion, sum, walker );
} else {
break;
}
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
return sum;
}
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord read : reads ) {
for( final GATKSAMRecord read : myReads ) {
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
activeRegion.add( read );
}
}
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());

View File

@ -65,7 +65,8 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
@Override
public void progress(MapData lastProcessedMap) {
if ( lastProcessedMap.refContext != null )
printProgress(lastProcessedMap.refContext.getLocus());
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
printProgress(lastProcessedMap.refContext.getLocus().getStopLocation());
}
});
}

View File

@ -113,25 +113,39 @@ import java.util.List;
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@PartitionBy(PartitionType.READ)
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
/**
* all the command line arguments for BQSR and it's covariates
*/
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
/**
* When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency
* purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag
* to safely access only one table. There may be some CPU cost, but as long as the table is really big
* there should be relatively little CPU costs.
*/
@Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false)
public boolean lowMemoryMode = false;
@Advanced
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
/**
* an object that keeps track of the information necessary for quality score quantization
*/
private QuantizationInfo quantizationInfo;
private RecalibrationTables recalibrationTables;
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
/**
* list to hold the all the covariate objects that were requested (required + standard + experimental)
*/
private Covariate[] requestedCovariates;
private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* Based on the covariates' estimates for initial capacity allocate the data hashmap
*/
public void initialize() {
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
// check for unsupported access
@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
initializeRecalibrationEngine();
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
referenceReader = getToolkit().getReferenceDataSource().getReference();
}
private RecalibrationEngine initializeRecalibrationEngine() {
/**
* Initialize the recalibration engine
*/
private void initializeRecalibrationEngine() {
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
try {
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
constructor.setAccessible(true);
return (RecalibrationEngine)constructor.newInstance();
}
catch (Exception e) {
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName());
}
recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode);
}
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
logger.info("Processed: " + result + " reads");
}
private RecalibrationTables getRecalibrationTable() {
return recalibrationEngine.getFinalRecalibrationTables();
}
private void generatePlots() {
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
}
/**
@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* generate a quantization map (recalibrated_qual -> quantized_qual)
*/
private void quantizeQualityScores() {
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS);
}
private void generateReport() {
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
}
}

View File

@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo {
if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null");
if ( skips == null ) throw new IllegalArgumentException("skips cannot be null");
if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null");
// future: may allow insertionErrors && deletionErrors to be null, so don't enforce
if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null");
if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null");
this.read = read;
this.baseQuals = read.getBaseQualities();
@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo {
if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length);
if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
}
/**

View File

@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable;
if(SORT_BY_ALL_COLUMNS) {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
}

View File

@ -1,35 +1,90 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public interface RecalibrationEngine {
import java.io.PrintStream;
import java.util.LinkedList;
import java.util.List;
public class RecalibrationEngine {
final protected Covariate[] covariates;
final private int numReadGroups;
final private PrintStream maybeLogStream;
final private boolean lowMemoryMode;
/**
* Has finalizeData() been called?
*/
private boolean finalized = false;
/**
* The final (merged, etc) recalibration tables, suitable for downstream analysis.
*/
private RecalibrationTables finalRecalibrationTables = null;
private final List<RecalibrationTables> recalibrationTablesList = new LinkedList<RecalibrationTables>();
private final ThreadLocal<RecalibrationTables> threadLocalTables = new ThreadLocal<RecalibrationTables>() {
private synchronized RecalibrationTables makeAndCaptureTable() {
final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream);
recalibrationTablesList.add(newTable);
return newTable;
}
@Override
protected synchronized RecalibrationTables initialValue() {
if ( lowMemoryMode ) {
return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0);
} else {
return makeAndCaptureTable();
}
}
};
/**
* Get a recalibration table suitable for updating the underlying RecalDatums
*
* May return a thread-local version, or a single version, depending on the initialization
* arguments of this instance.
*
* @return updated tables
*/
protected RecalibrationTables getUpdatableRecalibrationTables() {
return threadLocalTables.get();
}
/**
* Initialize the recalibration engine
*
@ -40,21 +95,167 @@ public interface RecalibrationEngine {
* The engine should collect match and mismatch data into the recalibrationTables data.
*
* @param covariates an array of the covariates we'll be using in this engine, order matters
* @param recalibrationTables the destination recalibrationTables where stats should be collected
* @param numReadGroups the number of read groups we should use for the recalibration tables
* @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables
*/
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups);
this.covariates = covariates.clone();
this.numReadGroups = numReadGroups;
this.maybeLogStream = maybeLogStream;
this.lowMemoryMode = enableLowMemoryMode;
}
/**
* Update the recalibration statistics using the information in recalInfo
* @param recalInfo data structure holding information about the recalibration values for a single read
*/
@Requires("recalInfo != null")
public void updateDataForRead(final ReadRecalibrationInfo recalInfo);
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final RecalibrationTables tables = getUpdatableRecalibrationTables();
final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.ordinal();
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
}
/**
* Finalize, if appropriate, all derived data in recalibrationTables.
*
* Called once after all calls to updateDataForRead have been issued.
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
public void finalizeData();
public void finalizeData() {
if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called");
// merge all of the thread-local tables
finalRecalibrationTables = mergeThreadLocalRecalibrationTables();
final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
finalized = true;
}
/**
* Merge all of the thread local recalibration tables into a single one.
*
* Reuses one of the recalibration tables to hold the merged table, so this function can only be
* called once in the engine.
*
* @return the merged recalibration table
*/
@Requires("! finalized")
private RecalibrationTables mergeThreadLocalRecalibrationTables() {
if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty");
RecalibrationTables merged = null;
for ( final RecalibrationTables table : recalibrationTablesList ) {
if ( merged == null )
// fast path -- if there's only only one table, so just make it the merged one
merged = table;
else {
merged.combine(table);
}
}
return merged;
}
/**
* Get the final recalibration tables, after finalizeData() has been called
*
* This returns the finalized recalibration table collected by this engine.
*
* It is an error to call this function before finalizeData has been called
*
* @return the finalized recalibration table collected by this engine
*/
public RecalibrationTables getFinalRecalibrationTables() {
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
return finalRecalibrationTables;
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1L, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1L, isError);
}
}
}

View File

@ -1,144 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
@Override
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null");
this.covariates = covariates.clone();
this.recalibrationTables = recalibrationTables;
}
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final EventType eventType = EventType.BASE_SUBSTITUTION;
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
final int[] keys = readCovariates.getKeySet(offset, eventType);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index);
}
}
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1L, isError, reportedQual);
}
/**
* Create derived recalibration data tables
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
@Override
public void finalizeData() {
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1, isError);
}
}
}

View File

@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription, 6, true, false);
report.addTable(reportName, reportDescription, 6);
table = report.getTable(reportName);
table.addColumn("readgroup");
table.addColumn("cycle");

View File

@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
// create the table
final String tableName = ve.getSimpleName();
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()));
// grab the table, and add the columns we need to it
final GATKReportTable table = report.getTable(tableName);

View File

@ -26,6 +26,11 @@ public class ActiveRegion implements HasGenomeLocation {
private final GenomeLocParser genomeLocParser;
public final boolean isActive;
// maximum stop position of all reads with start position in this active region
// Used only by ExperimentalReadShardTraverseActiveRegions
// NB: these reads may not be associated with this active region!
private int maxReadStop;
public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
this.activeRegionLoc = activeRegionLoc;
this.isActive = isActive;
@ -33,6 +38,7 @@ public class ActiveRegion implements HasGenomeLocation {
this.extension = extension;
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
fullExtentReferenceLoc = extendedLoc;
maxReadStop = activeRegionLoc.getStart();
}
@Override
@ -93,6 +99,18 @@ public class ActiveRegion implements HasGenomeLocation {
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
public void setMaxReadStop(int maxReadStop) {
this.maxReadStop = maxReadStop;
}
public int getMaxReadStop() {
return maxReadStop;
}
public int getExtendedMaxReadStop() {
return maxReadStop + extension;
}
public boolean equalExceptReads(final ActiveRegion other) {
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
if ( isActive != other.isActive ) return false;

View File

@ -0,0 +1,14 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 1/2/13
* Time: 4:59 PM
* To change this template use File | Settings | File Templates.
*/
public enum ExperimentalActiveRegionShardType {
LOCUSSHARD, // default/legacy type
READSHARD,
ACTIVEREGIONSHARD
}

View File

@ -160,6 +160,13 @@ public class ProgressMeter {
public ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals) {
this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
protected ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals,
final long pollingFrequency) {
if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null");
if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null");
@ -184,10 +191,14 @@ public class ProgressMeter {
targetSizeInBP = processingIntervals.coveredSize();
// start up the timer
progressMeterDaemon = new ProgressMeterDaemon(this);
progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency);
start();
}
public ProgressMeterDaemon getProgressMeterDaemon() {
return progressMeterDaemon;
}
/**
* Start up the progress meter, printing initialization message and starting up the
* daemon thread for periodic printing.
@ -223,11 +234,13 @@ public class ProgressMeter {
* the progress itself. A separate printing daemon periodically polls the meter to print out
* progress
*
* @param loc Current location, can be null if you are at the end of the processing unit
* @param loc Current location, can be null if you are at the end of the processing unit. Must
* have size == 1 (cannot be multiple bases in size).
* @param nTotalRecordsProcessed the total number of records we've processed
*/
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));

View File

@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter;
* Time: 9:16 PM
*/
public final class ProgressMeterDaemon extends Thread {
public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
/**
* How frequently should we poll and print progress?
*/
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
private final long pollFrequencyMilliseconds;
/**
* How long are we waiting between print progress calls are issued?
* @return the time in milliseconds between progress meter calls
*/
private long getPollFrequencyMilliseconds() {
return pollFrequencyMilliseconds;
}
/**
* Are we to continue periodically printing status, or should we shut down?
@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread {
* Create a new ProgressMeterDaemon printing progress for meter
* @param meter the progress meter to print progress of
*/
public ProgressMeterDaemon(final ProgressMeter meter) {
public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) {
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds);
this.meter = meter;
this.pollFrequencyMilliseconds = pollFrequencyMilliseconds;
setDaemon(true);
setName("ProgressMeterDaemon");
}
public ProgressMeterDaemon(final ProgressMeter meter) {
this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
/**
* Tells this daemon thread to shutdown at the next opportunity, as the progress
* metering is complete.
@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread {
this.done = true;
}
/**
* Is this daemon thread done?
* @return true if done, false otherwise
*/
public boolean isDone() {
return done;
}
/**
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
* necessary, the provided progress meter. Never exits until the JVM is complete,
@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread {
while (! done) {
meter.printProgress(false);
try {
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
Thread.sleep(getPollFrequencyMilliseconds());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}

View File

@ -198,7 +198,7 @@ public class BaseRecalibration {
}
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
@ -210,7 +210,7 @@ public class BaseRecalibration {
}
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
@ -240,7 +240,7 @@ public class BaseRecalibration {
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
double result = 0.0;
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
if (empiricalQualRG != null) {
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
@ -254,7 +254,7 @@ public class BaseRecalibration {
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
double result = 0.0;
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
if (empiricalQualQS != null) {
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
@ -287,7 +287,7 @@ public class BaseRecalibration {
final double globalDeltaQ,
final double deltaQReported,
final byte qualFromRead) {
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
if (empiricalQualCO != null) {
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);

View File

@ -1,41 +1,39 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum EventType {
BASE_SUBSTITUTION(0, "M", "Base Substitution"),
BASE_INSERTION(1, "I", "Base Insertion"),
BASE_DELETION(2, "D", "Base Deletion");
BASE_SUBSTITUTION("M", "Base Substitution"),
BASE_INSERTION("I", "Base Insertion"),
BASE_DELETION("D", "Base Deletion");
public final int index;
private final String representation;
private final String longRepresentation;
private EventType(int index, String representation, String longRepresentation) {
this.index = index;
private EventType(String representation, String longRepresentation) {
this.representation = representation;
this.longRepresentation = longRepresentation;
}
/**
* Get the EventType corresponding to its ordinal index
* @param index an ordinal index
* @return the event type corresponding to ordinal index
*/
public static EventType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
return EventType.values()[index];
}
public static EventType eventFrom(String event) {
/**
* Get the EventType with short string representation
* @throws IllegalArgumentException if representation doesn't correspond to one of EventType
* @param representation short string representation of the event
* @return an EventType
*/
public static EventType eventFrom(String representation) {
for (EventType eventType : EventType.values())
if (eventType.representation.equals(event))
if (eventType.representation.equals(representation))
return eventType;
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
throw new IllegalArgumentException(String.format("Event %s does not exist.", representation));
}
@Override

View File

@ -67,10 +67,10 @@ public class QuantizationInfo {
return quantizationLevels;
}
public GATKReportTable generateReportTable(boolean sortBycols) {
public GATKReportTable generateReportTable(boolean sortByCols) {
GATKReportTable quantizedTable;
if(sortBycols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
if(sortByCols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
}

View File

@ -68,9 +68,9 @@ public class ReadCovariates {
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
*/
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
}
/**
@ -81,11 +81,11 @@ public class ReadCovariates {
* @return
*/
public int[] getKeySet(final int readPosition, final EventType errorModel) {
return keys[errorModel.index][readPosition];
return keys[errorModel.ordinal()][readPosition];
}
public int[][] getKeySet(final EventType errorModel) {
return keys[errorModel.index];
return keys[errorModel.ordinal()];
}
// ----------------------------------------------------------------------
@ -94,17 +94,9 @@ public class ReadCovariates {
//
// ----------------------------------------------------------------------
protected int[][] getMismatchesKeySet() {
return keys[EventType.BASE_SUBSTITUTION.index];
}
protected int[][] getInsertionsKeySet() {
return keys[EventType.BASE_INSERTION.index];
}
protected int[][] getDeletionsKeySet() {
return keys[EventType.BASE_DELETION.index];
}
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
protected int[] getMismatchesKeySet(final int readPosition) {
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);

View File

@ -287,7 +287,7 @@ public class RecalUtils {
final GATKReportTable reportTable;
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
if(sortByCols) {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
}

View File

@ -133,7 +133,7 @@ public class RecalibrationReport {
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempCOVarray[3] = event.index;
tempCOVarray[3] = event.ordinal();
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
}
@ -152,7 +152,7 @@ public class RecalibrationReport {
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempQUALarray[2] = event.index;
tempQUALarray[2] = event.ordinal();
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
}
@ -169,7 +169,7 @@ public class RecalibrationReport {
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempRGarray[1] = event.index;
tempRGarray[1] = event.ordinal();
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
}

View File

@ -123,12 +123,16 @@ public final class RecalibrationTables {
}
/**
* Merge in the quality score table information from qualityScoreTable into this
* recalibration table's quality score table.
*
* @param qualityScoreTable the quality score table we want to merge in
* Merge all of the tables from toMerge into into this set of tables
*/
public void combineQualityScoreTable(final NestedIntegerArray<RecalDatum> qualityScoreTable) {
RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable);
public void combine(final RecalibrationTables toMerge) {
if ( numTables() != toMerge.numTables() )
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
for ( int i = 0; i < numTables(); i++ ) {
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
RecalUtils.combineTables(myTable, otherTable);
}
}
}

View File

@ -32,6 +32,13 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
import java.io.FileInputStream;
import java.io.DataInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class GATKReportUnitTest extends BaseTest {
@Test
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
private GATKReportTable getTableWithRandomValues() {
Random number = new Random(123L);
final int VALUESRANGE = 10;
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
final int NUMROWS = 100;
for (int x = 0; x < NUMROWS; x++) {
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
}
return table;
}
@Test(enabled = true)
public void testSortingByColumn() {
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
}
private boolean isSorted(GATKReportTable table) {
boolean result = true;
File testingSortingTableFile = new File("testSortingFile.txt");
try {
// Connect print stream to the output stream
PrintStream ps = new PrintStream(testingSortingTableFile);
table.write(ps);
ps.close();
}
catch (Exception e){
System.err.println ("Error: " + e.getMessage());
}
ArrayList<int[]> rows = new ArrayList<int[]>();
try {
// Open the file
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fStream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
String[] parts = strLine.split(" ");
int l = parts.length;
int[] row = new int[l];
for(int n = 0; n < l; n++) {
row[n] = Integer.parseInt(parts[n]);
}
rows.add(row);
}
//Close the input stream
in.close();
} catch (Exception e){//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
for (int x = 1; x < rows.size() && result; x++) {
result = checkRowOrder(rows.get(x - 1), rows.get(x));
}
return result;
}
private boolean checkRowOrder(int[] row1, int[] row2) {
int l = row1.length;
final int EQUAL = 0;
int result = EQUAL;
for(int x = 0; x < l && ( result <= EQUAL); x++) {
result = ((Integer)row1[x]).compareTo(row2[x]);
}
if (result <= EQUAL) {
return true;
} else {
return false;
}
}
private GATKReportTable makeBasicTable() {
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
GATKReportTable table = report.getTable("TableName");
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);
report1.addTable("Table3", "blah", 1, true, false);
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
report1.getTable("Table3").addColumn("a");
report1.getTable("Table3").addRowIDMapping("q", 2);
report1.getTable("Table3").addRowIDMapping("5", 3);

View File

@ -3,10 +3,16 @@ package org.broadinstitute.sting.gatk.traversals;
import com.google.java.contract.PreconditionError;
import net.sf.samtools.*;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -15,7 +21,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -28,6 +33,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.TestException;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -95,7 +101,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
}
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
private final TraverseActiveRegions<Integer, Integer> traverse = new TraverseActiveRegions<Integer, Integer>();
private final ExperimentalReadShardTraverseActiveRegions<Integer, Integer> readShardTraverse = new ExperimentalReadShardTraverseActiveRegions<Integer, Integer>();
private final ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer> activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer>();
private IndexedFastaSequenceFile reference;
private SAMSequenceDictionary dictionary;
@ -106,21 +114,22 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
@BeforeClass
private void init() throws FileNotFoundException {
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
// TODO: test shard boundaries
// TODO: reads with indels
// TODO: reads which span many regions
// TODO: reads which are partially between intervals (in/outside extension)
// TODO: duplicate reads
// TODO: read at the end of a contig
// TODO: reads which are completely outside intervals but within extension
// TODO: test the extension itself
// TODO: unmapped reads
intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
@ -142,6 +151,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
createBAM(reads);
@ -153,7 +165,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
File indexFile = new File(testBAI);
indexFile.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
out.addAlignment(read);
}
@ -171,8 +183,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
t.traverse(walker, dataProvider, 0);
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
traverse(walker, dataProvider, 0);
activeIntervals.addAll(walker.isActiveCalls);
}
@ -272,6 +284,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -286,6 +301,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -309,6 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -323,6 +347,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -347,6 +377,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -361,6 +394,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -382,10 +421,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
t.traverse(walker, dataProvider, 0);
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
traverse(walker, dataProvider, 0);
t.endTraversal(walker, 0);
endTraversal(walker, 0);
return walker.mappedActiveRegions;
}
@ -429,6 +468,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
header.setSequenceDictionary(dictionary);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
@ -445,10 +485,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
return record;
}
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
private List<ShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
GATKArgumentCollection arguments = new GATKArgumentCollection();
arguments.activeRegionShardType = shardType; // make explicit
engine.setArguments(arguments);
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
@ -456,13 +498,65 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser);
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
List<ShardDataProvider> providers = new ArrayList<ShardDataProvider>();
switch (shardType) {
case LOCUSSHARD:
traverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
}
break;
case READSHARD:
readShardTraverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) {
providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList<ReferenceOrderedDataSource>()));
}
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
}
break;
default: throw new TestException("Invalid shard type");
}
return providers;
}
private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) {
switch (shardType) {
case LOCUSSHARD:
traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i);
break;
case READSHARD:
readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i);
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i);
break;
default: throw new TestException("Invalid shard type");
}
}
private void endTraversal(DummyActiveRegionWalker walker, int i) {
switch (shardType) {
case LOCUSSHARD:
traverse.endTraversal(walker, i);
break;
case READSHARD:
readShardTraverse.endTraversal(walker, i);
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.endTraversal(walker, i);
break;
default: throw new TestException("Invalid shard type");
}
}
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
public final class ReadRecalibrationInfoUnitTest extends BaseTest {
@DataProvider(name = "InfoProvider")
public Object[][] createCombineTablesProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int readLength: Arrays.asList(10, 100, 1000) ) {
for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) {
tests.add(new Object[]{readLength, includeIndelErrors});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "InfoProvider")
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
final ReadCovariates covariates = new ReadCovariates(readLength, 2);
final byte[] bases = new byte[readLength];
final byte[] baseQuals = new byte[readLength];
final byte[] insertionQuals = new byte[readLength];
final byte[] deletionQuals = new byte[readLength];
final boolean[] skips = new boolean[readLength];
final double[] snpErrors = new double[readLength];
final double[] insertionErrors = new double[readLength];
final double[] deletionsErrors = new double[readLength];
for ( int i = 0; i < readLength; i++ ) {
bases[i] = 'A';
baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE);
insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE);
deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE);
skips[i] = i % 2 == 0;
snpErrors[i] = 1.0 / (i+1);
insertionErrors[i] = 0.5 / (i+1);
deletionsErrors[i] = 0.3 / (i+1);
}
final EnumMap<EventType, double[]> errors = new EnumMap<EventType, double[]>(EventType.class);
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
errors.put(EventType.BASE_INSERTION, insertionErrors);
errors.put(EventType.BASE_DELETION, deletionsErrors);
final EnumMap<EventType, byte[]> quals = new EnumMap<EventType, byte[]>(EventType.class);
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
quals.put(EventType.BASE_INSERTION, insertionQuals);
quals.put(EventType.BASE_DELETION, deletionQuals);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M");
if ( includeIndelErrors ) {
read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION);
read.setBaseQualities(deletionQuals, EventType.BASE_DELETION);
}
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
Assert.assertEquals(info.getCovariatesValues(), covariates);
Assert.assertEquals(info.getRead(), read);
for ( int i = 0; i < readLength; i++ ) {
Assert.assertEquals(info.skip(i), skips[i]);
for ( final EventType et : EventType.values() ) {
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL;
Assert.assertEquals(info.getQual(et, i), expectedQual);
}
}
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* UnitTests for the ProgressMeterDaemon
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDaemonUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() throws FileNotFoundException {
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
}
// capture and count calls to progress
private class TestingProgressMeter extends ProgressMeter {
final List<Long> progressCalls = new LinkedList<Long>();
private TestingProgressMeter(final long poll) {
super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll);
}
@Override
protected synchronized void printProgress(boolean mustPrint) {
progressCalls.add(System.currentTimeMillis());
}
}
@DataProvider(name = "PollingData")
public Object[][] makePollingData() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int ticks : Arrays.asList(1, 5, 10) ) {
for ( final int poll : Arrays.asList(10, 100) ) {
tests.add(new Object[]{poll, ticks});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90)
public void testMe(final long poll, final int ticks) throws InterruptedException {
final TestingProgressMeter meter = new TestingProgressMeter(poll);
final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon();
Assert.assertTrue(daemon.isDaemon());
Assert.assertFalse(daemon.isDone());
Thread.sleep(ticks * poll);
Assert.assertFalse(daemon.isDone());
daemon.done();
Assert.assertTrue(daemon.isDone());
Assert.assertEquals(meter.progressCalls.size(), ticks,
"Expected " + ticks + " progress calls from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* UnitTests for the ProgressMeterData
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDataUnitTest extends BaseTest {
@Test
public void testBasic() {
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3);
}
@Test
public void testFraction() {
final double TOL = 1e-3;
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL);
}
@Test
public void testSecondsPerBP() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL);
}
@Test
public void testSecondsPerElement() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL);
}
}

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.*;
public final class EventTypeUnitTest extends BaseTest {
@Test
public void testEventTypes() {
for ( final EventType et : EventType.values() ) {
Assert.assertNotNull(et.toString());
Assert.assertNotNull(et.prettyPrint());
Assert.assertFalse("".equals(et.toString()));
Assert.assertFalse("".equals(et.prettyPrint()));
Assert.assertEquals(EventType.eventFrom(et.ordinal()), et);
Assert.assertEquals(EventType.eventFrom(et.toString()), et);
}
}
@Test
public void testEventTypesEnumItself() {
final Set<String> shortReps = new HashSet<String>();
for ( final EventType et : EventType.values() ) {
Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et);
shortReps.add(et.toString());
}
Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadString() {
EventType.eventFrom("asdfhalsdjfalkjsdf");
}
}

View File

@ -20,9 +20,7 @@ public class RecalibrationReportUnitTest {
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
int nErrors = random.nextInt(maxErrors);
while ( nErrors > nObservations )
nErrors = random.nextInt(maxErrors);
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
}
@ -92,14 +90,14 @@ public class RecalibrationReportUnitTest {
final int[] covariates = rc.getKeySet(offset, errorMode);
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
nKeys += 2;
for (int j = 0; j < optionalCovariates.size(); j++) {
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
if ( covValue >= 0 ) {
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index);
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
nKeys++;
}
}