auto-merge

This commit is contained in:
Ryan Poplin 2013-01-04 12:21:03 -05:00
commit 96efa8b19f
22 changed files with 731 additions and 372 deletions

View File

@ -1,99 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.LinkedList;
import java.util.List;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
final List<NestedIntegerArray<RecalDatum>> allThreadLocalQualityScoreTables = new LinkedList<NestedIntegerArray<RecalDatum>>();
private ThreadLocal<NestedIntegerArray<RecalDatum>> threadLocalQualityScoreTables = new ThreadLocal<NestedIntegerArray<RecalDatum>>() {
@Override
protected synchronized NestedIntegerArray<RecalDatum> initialValue() {
final NestedIntegerArray<RecalDatum> table = recalibrationTables.makeQualityScoreTable();
allThreadLocalQualityScoreTables.add(table);
return table;
}
};
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final NestedIntegerArray<RecalDatum> qualityScoreTable = getThreadLocalQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* Get a NestedIntegerArray for a QualityScore table specific to this thread
* @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
*/
private NestedIntegerArray<RecalDatum> getThreadLocalQualityScoreTable() {
return threadLocalQualityScoreTables.get();
}
@Override
public void finalizeData() {
// merge in all of the thread local tables
logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
for ( final NestedIntegerArray<RecalDatum> localTable : allThreadLocalQualityScoreTables ) {
recalibrationTables.combineQualityScoreTable(localTable);
}
allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
super.finalizeData();
}
}

View File

@ -65,7 +65,8 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
@Override
public void progress(MapData lastProcessedMap) {
if ( lastProcessedMap.refContext != null )
printProgress(lastProcessedMap.refContext.getLocus());
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
printProgress(lastProcessedMap.refContext.getLocus().getStopLocation());
}
});
}

View File

@ -113,25 +113,39 @@ import java.util.List;
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@PartitionBy(PartitionType.READ)
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
/**
* all the command line arguments for BQSR and it's covariates
*/
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
/**
* When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency
* purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag
* to safely access only one table. There may be some CPU cost, but as long as the table is really big
* there should be relatively little CPU costs.
*/
@Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false)
public boolean lowMemoryMode = false;
@Advanced
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
/**
* an object that keeps track of the information necessary for quality score quantization
*/
private QuantizationInfo quantizationInfo;
private RecalibrationTables recalibrationTables;
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
/**
* list to hold the all the covariate objects that were requested (required + standard + experimental)
*/
private Covariate[] requestedCovariates;
private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* Based on the covariates' estimates for initial capacity allocate the data hashmap
*/
public void initialize() {
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
// check for unsupported access
@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
initializeRecalibrationEngine();
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
referenceReader = getToolkit().getReferenceDataSource().getReference();
}
private RecalibrationEngine initializeRecalibrationEngine() {
/**
* Initialize the recalibration engine
*/
private void initializeRecalibrationEngine() {
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
try {
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
constructor.setAccessible(true);
return (RecalibrationEngine)constructor.newInstance();
}
catch (Exception e) {
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName());
}
recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode);
}
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
logger.info("Processed: " + result + " reads");
}
private RecalibrationTables getRecalibrationTable() {
return recalibrationEngine.getFinalRecalibrationTables();
}
private void generatePlots() {
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
}
/**
@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* generate a quantization map (recalibrated_qual -> quantized_qual)
*/
private void quantizeQualityScores() {
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS);
}
private void generateReport() {
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
}
}

View File

@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo {
if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null");
if ( skips == null ) throw new IllegalArgumentException("skips cannot be null");
if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null");
// future: may allow insertionErrors && deletionErrors to be null, so don't enforce
if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null");
if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null");
this.read = read;
this.baseQuals = read.getBaseQualities();
@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo {
if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length);
if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
}
/**

View File

@ -1,35 +1,90 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public interface RecalibrationEngine {
import java.io.PrintStream;
import java.util.LinkedList;
import java.util.List;
public class RecalibrationEngine {
final protected Covariate[] covariates;
final private int numReadGroups;
final private PrintStream maybeLogStream;
final private boolean lowMemoryMode;
/**
* Has finalizeData() been called?
*/
private boolean finalized = false;
/**
* The final (merged, etc) recalibration tables, suitable for downstream analysis.
*/
private RecalibrationTables finalRecalibrationTables = null;
private final List<RecalibrationTables> recalibrationTablesList = new LinkedList<RecalibrationTables>();
private final ThreadLocal<RecalibrationTables> threadLocalTables = new ThreadLocal<RecalibrationTables>() {
private synchronized RecalibrationTables makeAndCaptureTable() {
final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream);
recalibrationTablesList.add(newTable);
return newTable;
}
@Override
protected synchronized RecalibrationTables initialValue() {
if ( lowMemoryMode ) {
return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0);
} else {
return makeAndCaptureTable();
}
}
};
/**
* Get a recalibration table suitable for updating the underlying RecalDatums
*
* May return a thread-local version, or a single version, depending on the initialization
* arguments of this instance.
*
* @return
*/
protected RecalibrationTables getUpdatableRecalibrationTables() {
return threadLocalTables.get();
}
/**
* Initialize the recalibration engine
*
@ -40,21 +95,167 @@ public interface RecalibrationEngine {
* The engine should collect match and mismatch data into the recalibrationTables data.
*
* @param covariates an array of the covariates we'll be using in this engine, order matters
* @param recalibrationTables the destination recalibrationTables where stats should be collected
* @param numReadGroups the number of read groups we should use for the recalibration tables
* @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables
*/
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups);
this.covariates = covariates.clone();
this.numReadGroups = numReadGroups;
this.maybeLogStream = maybeLogStream;
this.lowMemoryMode = enableLowMemoryMode;
}
/**
* Update the recalibration statistics using the information in recalInfo
* @param recalInfo data structure holding information about the recalibration values for a single read
*/
@Requires("recalInfo != null")
public void updateDataForRead(final ReadRecalibrationInfo recalInfo);
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final RecalibrationTables tables = getUpdatableRecalibrationTables();
final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.ordinal();
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
}
/**
* Finalize, if appropriate, all derived data in recalibrationTables.
*
* Called once after all calls to updateDataForRead have been issued.
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
public void finalizeData();
public void finalizeData() {
if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called");
// merge all of the thread-local tables
finalRecalibrationTables = mergeThreadLocalRecalibrationTables();
final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
finalized = true;
}
/**
* Merge all of the thread local recalibration tables into a single one.
*
* Reuses one of the recalibration tables to hold the merged table, so this function can only be
* called once in the engine.
*
* @return the merged recalibration table
*/
@Requires("! finalized")
private RecalibrationTables mergeThreadLocalRecalibrationTables() {
if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty");
RecalibrationTables merged = null;
for ( final RecalibrationTables table : recalibrationTablesList ) {
if ( merged == null )
// fast path -- if there's only only one table, so just make it the merged one
merged = table;
else {
merged.combine(table);
}
}
return merged;
}
/**
* Get the final recalibration tables, after finalizeData() has been called
*
* This returns the finalized recalibration table collected by this engine.
*
* It is an error to call this function before finalizeData has been called
*
* @return the finalized recalibration table collected by this engine
*/
public RecalibrationTables getFinalRecalibrationTables() {
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
return finalRecalibrationTables;
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1.0, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1.0, isError);
}
}
}

View File

@ -1,144 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
@Override
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null");
this.covariates = covariates.clone();
this.recalibrationTables = recalibrationTables;
}
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final EventType eventType = EventType.BASE_SUBSTITUTION;
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
final int[] keys = readCovariates.getKeySet(offset, eventType);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index);
}
}
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
}
/**
* Create derived recalibration data tables
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
@Override
public void finalizeData() {
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1.0, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1.0, isError);
}
}
}

View File

@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription, 6, GATKReportTable.TableSortingWay.SORT_BY_ROW);
report.addTable(reportName, reportDescription, 6);
table = report.getTable(reportName);
table.addColumn("readgroup");
table.addColumn("cycle");

View File

@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
// create the table
final String tableName = ve.getSimpleName();
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), GATKReportTable.TableSortingWay.SORT_BY_ROW);
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()));
// grab the table, and add the columns we need to it
final GATKReportTable table = report.getTable(tableName);

View File

@ -160,6 +160,13 @@ public class ProgressMeter {
public ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals) {
this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
protected ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals,
final long pollingFrequency) {
if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null");
if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null");
@ -184,10 +191,14 @@ public class ProgressMeter {
targetSizeInBP = processingIntervals.coveredSize();
// start up the timer
progressMeterDaemon = new ProgressMeterDaemon(this);
progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency);
start();
}
public ProgressMeterDaemon getProgressMeterDaemon() {
return progressMeterDaemon;
}
/**
* Start up the progress meter, printing initialization message and starting up the
* daemon thread for periodic printing.
@ -223,11 +234,13 @@ public class ProgressMeter {
* the progress itself. A separate printing daemon periodically polls the meter to print out
* progress
*
* @param loc Current location, can be null if you are at the end of the processing unit
* @param loc Current location, can be null if you are at the end of the processing unit. Must
* have size == 1 (cannot be multiple bases in size).
* @param nTotalRecordsProcessed the total number of records we've processed
*/
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));

View File

@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter;
* Time: 9:16 PM
*/
public final class ProgressMeterDaemon extends Thread {
public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
/**
* How frequently should we poll and print progress?
*/
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
private final long pollFrequencyMilliseconds;
/**
* How long are we waiting between print progress calls are issued?
* @return the time in milliseconds between progress meter calls
*/
private long getPollFrequencyMilliseconds() {
return pollFrequencyMilliseconds;
}
/**
* Are we to continue periodically printing status, or should we shut down?
@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread {
* Create a new ProgressMeterDaemon printing progress for meter
* @param meter the progress meter to print progress of
*/
public ProgressMeterDaemon(final ProgressMeter meter) {
public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) {
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds);
this.meter = meter;
this.pollFrequencyMilliseconds = pollFrequencyMilliseconds;
setDaemon(true);
setName("ProgressMeterDaemon");
}
public ProgressMeterDaemon(final ProgressMeter meter) {
this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
/**
* Tells this daemon thread to shutdown at the next opportunity, as the progress
* metering is complete.
@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread {
this.done = true;
}
/**
* Is this daemon thread done?
* @return true if done, false otherwise
*/
public boolean isDone() {
return done;
}
/**
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
* necessary, the provided progress meter. Never exits until the JVM is complete,
@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread {
while (! done) {
meter.printProgress(false);
try {
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
Thread.sleep(getPollFrequencyMilliseconds());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}

View File

@ -198,7 +198,7 @@ public class BaseRecalibration {
}
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
@ -210,7 +210,7 @@ public class BaseRecalibration {
}
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
@ -240,7 +240,7 @@ public class BaseRecalibration {
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
double result = 0.0;
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
if (empiricalQualRG != null) {
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
@ -254,7 +254,7 @@ public class BaseRecalibration {
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
double result = 0.0;
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
if (empiricalQualQS != null) {
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
@ -287,7 +287,7 @@ public class BaseRecalibration {
final double globalDeltaQ,
final double deltaQReported,
final byte qualFromRead) {
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
if (empiricalQualCO != null) {
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);

View File

@ -1,41 +1,39 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum EventType {
BASE_SUBSTITUTION(0, "M", "Base Substitution"),
BASE_INSERTION(1, "I", "Base Insertion"),
BASE_DELETION(2, "D", "Base Deletion");
BASE_SUBSTITUTION("M", "Base Substitution"),
BASE_INSERTION("I", "Base Insertion"),
BASE_DELETION("D", "Base Deletion");
public final int index;
private final String representation;
private final String longRepresentation;
private EventType(int index, String representation, String longRepresentation) {
this.index = index;
private EventType(String representation, String longRepresentation) {
this.representation = representation;
this.longRepresentation = longRepresentation;
}
/**
* Get the EventType corresponding to its ordinal index
* @param index an ordinal index
* @return the event type corresponding to ordinal index
*/
public static EventType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
return EventType.values()[index];
}
public static EventType eventFrom(String event) {
/**
* Get the EventType with short string representation
* @throws IllegalArgumentException if representation doesn't correspond to one of EventType
* @param representation short string representation of the event
* @return an EventType
*/
public static EventType eventFrom(String representation) {
for (EventType eventType : EventType.values())
if (eventType.representation.equals(event))
if (eventType.representation.equals(representation))
return eventType;
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
throw new IllegalArgumentException(String.format("Event %s does not exist.", representation));
}
@Override

View File

@ -67,9 +67,9 @@ public class QuantizationInfo {
return quantizationLevels;
}
public GATKReportTable generateReportTable(boolean sortBycols) {
public GATKReportTable generateReportTable(boolean sortByCols) {
GATKReportTable quantizedTable;
if(sortBycols) {
if(sortByCols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);

View File

@ -68,9 +68,9 @@ public class ReadCovariates {
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
*/
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
}
/**
@ -81,11 +81,11 @@ public class ReadCovariates {
* @return
*/
public int[] getKeySet(final int readPosition, final EventType errorModel) {
return keys[errorModel.index][readPosition];
return keys[errorModel.ordinal()][readPosition];
}
public int[][] getKeySet(final EventType errorModel) {
return keys[errorModel.index];
return keys[errorModel.ordinal()];
}
// ----------------------------------------------------------------------
@ -94,17 +94,9 @@ public class ReadCovariates {
//
// ----------------------------------------------------------------------
protected int[][] getMismatchesKeySet() {
return keys[EventType.BASE_SUBSTITUTION.index];
}
protected int[][] getInsertionsKeySet() {
return keys[EventType.BASE_INSERTION.index];
}
protected int[][] getDeletionsKeySet() {
return keys[EventType.BASE_DELETION.index];
}
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
protected int[] getMismatchesKeySet(final int readPosition) {
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);

View File

@ -142,7 +142,7 @@ public class RecalibrationReport {
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempCOVarray[3] = event.index;
tempCOVarray[3] = event.ordinal();
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
}
@ -161,7 +161,7 @@ public class RecalibrationReport {
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempQUALarray[2] = event.index;
tempQUALarray[2] = event.ordinal();
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
}
@ -178,7 +178,7 @@ public class RecalibrationReport {
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempRGarray[1] = event.index;
tempRGarray[1] = event.ordinal();
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
}

View File

@ -123,12 +123,16 @@ public final class RecalibrationTables {
}
/**
* Merge in the quality score table information from qualityScoreTable into this
* recalibration table's quality score table.
*
* @param qualityScoreTable the quality score table we want to merge in
* Merge all of the tables from toMerge into into this set of tables
*/
public void combineQualityScoreTable(final NestedIntegerArray<RecalDatum> qualityScoreTable) {
RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable);
public void combine(final RecalibrationTables toMerge) {
if ( numTables() != toMerge.numTables() )
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
for ( int i = 0; i < numTables(); i++ ) {
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
RecalUtils.combineTables(myTable, otherTable);
}
}
}

View File

@ -105,7 +105,7 @@ public class GATKReportUnitTest extends BaseTest {
private boolean isSorted(GATKReportTable table) {
boolean result = true;
File testingSortingTableFile = new File("myFile.txt");
File testingSortingTableFile = new File("testSortingFile.txt");
try {
// Connect print stream to the output stream

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
public final class ReadRecalibrationInfoUnitTest extends BaseTest {
@DataProvider(name = "InfoProvider")
public Object[][] createCombineTablesProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int readLength: Arrays.asList(10, 100, 1000) ) {
for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) {
tests.add(new Object[]{readLength, includeIndelErrors});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "InfoProvider")
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
final ReadCovariates covariates = new ReadCovariates(readLength, 2);
final byte[] bases = new byte[readLength];
final byte[] baseQuals = new byte[readLength];
final byte[] insertionQuals = new byte[readLength];
final byte[] deletionQuals = new byte[readLength];
final boolean[] skips = new boolean[readLength];
final double[] snpErrors = new double[readLength];
final double[] insertionErrors = new double[readLength];
final double[] deletionsErrors = new double[readLength];
for ( int i = 0; i < readLength; i++ ) {
bases[i] = 'A';
baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE);
insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE);
deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE);
skips[i] = i % 2 == 0;
snpErrors[i] = 1.0 / (i+1);
insertionErrors[i] = 0.5 / (i+1);
deletionsErrors[i] = 0.3 / (i+1);
}
final EnumMap<EventType, double[]> errors = new EnumMap<EventType, double[]>(EventType.class);
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
errors.put(EventType.BASE_INSERTION, insertionErrors);
errors.put(EventType.BASE_DELETION, deletionsErrors);
final EnumMap<EventType, byte[]> quals = new EnumMap<EventType, byte[]>(EventType.class);
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
quals.put(EventType.BASE_INSERTION, insertionQuals);
quals.put(EventType.BASE_DELETION, deletionQuals);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M");
if ( includeIndelErrors ) {
read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION);
read.setBaseQualities(deletionQuals, EventType.BASE_DELETION);
}
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
Assert.assertEquals(info.getCovariatesValues(), covariates);
Assert.assertEquals(info.getRead(), read);
for ( int i = 0; i < readLength; i++ ) {
Assert.assertEquals(info.skip(i), skips[i]);
for ( final EventType et : EventType.values() ) {
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL;
Assert.assertEquals(info.getQual(et, i), expectedQual);
}
}
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* UnitTests for the ProgressMeterDaemon
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDaemonUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() throws FileNotFoundException {
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
}
// capture and count calls to progress
private class TestingProgressMeter extends ProgressMeter {
final List<Long> progressCalls = new LinkedList<Long>();
private TestingProgressMeter(final long poll) {
super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll);
}
@Override
protected synchronized void printProgress(boolean mustPrint) {
progressCalls.add(System.currentTimeMillis());
}
}
@DataProvider(name = "PollingData")
public Object[][] makePollingData() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int ticks : Arrays.asList(1, 5, 10) ) {
for ( final int poll : Arrays.asList(10, 100) ) {
tests.add(new Object[]{poll, ticks});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90)
public void testMe(final long poll, final int ticks) throws InterruptedException {
final TestingProgressMeter meter = new TestingProgressMeter(poll);
final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon();
Assert.assertTrue(daemon.isDaemon());
Assert.assertFalse(daemon.isDone());
Thread.sleep(ticks * poll);
Assert.assertFalse(daemon.isDone());
daemon.done();
Assert.assertTrue(daemon.isDone());
Assert.assertEquals(meter.progressCalls.size(), ticks,
"Expected " + ticks + " progress calls from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* UnitTests for the ProgressMeterData
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDataUnitTest extends BaseTest {
@Test
public void testBasic() {
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3);
}
@Test
public void testFraction() {
final double TOL = 1e-3;
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL);
}
@Test
public void testSecondsPerBP() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL);
}
@Test
public void testSecondsPerElement() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL);
}
}

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.*;
public final class EventTypeUnitTest extends BaseTest {
@Test
public void testEventTypes() {
for ( final EventType et : EventType.values() ) {
Assert.assertNotNull(et.toString());
Assert.assertNotNull(et.prettyPrint());
Assert.assertFalse("".equals(et.toString()));
Assert.assertFalse("".equals(et.prettyPrint()));
Assert.assertEquals(EventType.eventFrom(et.ordinal()), et);
Assert.assertEquals(EventType.eventFrom(et.toString()), et);
}
}
@Test
public void testEventTypesEnumItself() {
final Set<String> shortReps = new HashSet<String>();
for ( final EventType et : EventType.values() ) {
Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et);
shortReps.add(et.toString());
}
Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadString() {
EventType.eventFrom("asdfhalsdjfalkjsdf");
}
}

View File

@ -20,7 +20,7 @@ public class RecalibrationReportUnitTest {
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
}
@ -90,14 +90,14 @@ public class RecalibrationReportUnitTest {
final int[] covariates = rc.getKeySet(offset, errorMode);
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
nKeys += 2;
for (int j = 0; j < optionalCovariates.size(); j++) {
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
if ( covValue >= 0 ) {
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index);
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
nKeys++;
}
}