BitSets keys to lower BQSR's memory footprint

Infrastructure:
	* Generic BitSet implementation with any precision (up to long)
	* Two's complement implementation of the bit set handles negative numbers (cycle covariate)
	* Memoized implementation of the BitSet utils for better performance.
	* All exponents are now calculated with bit shifts, fixing numerical precision issues with the double Math.pow.
	* Replace log/sqrt with bitwise logic to get rid of numerical issues

 BQSR:
	* All covariates output BitSets and have the functionality to decode them back into Object values.
	* Covariates are responsible for determining the size of the key they will use (number of bits).
	* Generalized KeyManager implementation combines any arbitrary number of covariates into one bitset key with event type
	* No more NestedHashMaps. Single key system now fits in one hash to reduce hash table objects overhead

 Tests:
	* Unit tests added to every method of BitSetUtils
	* Unit tests added to the generalized key system infrastructure of BQSRv2 (KeyManager)
	* Unit tests added to the cycle and context covariates (will add unit tests to all covariates)
This commit is contained in:
Mauricio Carneiro 2012-03-05 17:37:34 -05:00
parent e86ce8f3d6
commit ca11ab39e7
19 changed files with 868 additions and 457 deletions

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -43,7 +43,10 @@ public class ContextCovariate implements StandardCovariate {
private int mismatchesContextSize;
private int insertionsContextSize;
private int deletionsContextSize;
private int deletionsContextSize;
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
@ -62,7 +65,7 @@ public class ContextCovariate implements StandardCovariate {
int l = read.getReadLength();
BitSet[] mismatches = new BitSet[l];
BitSet[] insertions = new BitSet[l];
BitSet[] deletions = new BitSet[l];
BitSet[] deletions = new BitSet[l];
final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases();
@ -72,7 +75,7 @@ public class ContextCovariate implements StandardCovariate {
for (int i = 0; i < read.getReadLength(); i++) {
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
insertions[i] = contextWith(bases, i, insertionsContextSize);
deletions[i] = contextWith(bases, i, deletionsContextSize);
deletions[i] = contextWith(bases, i, deletionsContextSize);
}
if (negativeStrand) {
@ -89,24 +92,35 @@ public class ContextCovariate implements StandardCovariate {
return str;
}
@Override
public String keyFromBitSet(BitSet key) {
if (key.equals(NO_CONTEXT_BITSET))
return NO_CONTEXT_VALUE;
return BitSetUtils.dnaFrom(key);
}
@Override
public int numberOfBits() {
return Long.bitCount(-1L);
}
/**
* calculates the context of a base independent of the covariate mode
* calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
*
* @param bases the bases in the read to build the context from
* @param offset the position in the read to calculate the context for
* @param contextSize context size to use building the context
* @return
* @param bases the bases in the read to build the context from
* @param offset the position in the read to calculate the context for
* @param contextSize context size to use building the context
* @return the bitSet representing the Context
*/
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
if (offset < contextSize)
return null;
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (context.contains("N"))
return null;
return MathUtils.bitSetFrom(context);
}
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
BitSet result = NO_CONTEXT_BITSET;
if (offset >= contextSize) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (!context.contains("N"))
result = BitSetUtils.bitSetFrom(context);
}
return result;
}
/**
* Reverses the given array in place.

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.BitSet;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -53,7 +55,29 @@ public interface Covariate {
*/
public CovariateValues getValues(GATKSAMRecord read);
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
/**
* Used to get the covariate's value from input csv file during on-the-fly recalibration
*
* @param str the key in string type (read from the csv)
* @return the key in it's correct type.
*/
public Object getValue(String str);
/**
* Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
*
* @param key the bitset representation of the key
* @return a string representation of the key
*/
public String keyFromBitSet(BitSet key);
/**
* Each covariate should determine how many bits are necessary to encode it's data
*
* @return The number of bits used to represent the values of this covariate.
*/
public int numberOfBits();
}
interface RequiredCovariate extends Covariate {}

View File

@ -2,87 +2,107 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.BitSet;
import java.util.HashMap;
/**
* The object temporarily held by a read that describes all of it's covariates.
*
* The object temporarily held by a read that describes all of it's covariates.
*
* In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
*
* @author Mauricio Carneiro
* @since 2/8/12
*/
public class CovariateKeySet {
private Object[][] mismatchesKeySet;
private Object[][] insertionsKeySet;
private Object[][] deletionsKeySet;
private BitSet[][] mismatchesKeySet;
private BitSet[][] insertionsKeySet;
private BitSet[][] deletionsKeySet;
private int nextCovariateIndex;
private static String mismatchesCovariateName = "M";
private static String insertionsCovariateName = "I";
private static String deletionsCovariateName = "D";
// private static String mismatchesCovariateName = "M";
// private static String insertionsCovariateName = "I";
// private static String deletionsCovariateName = "D";
//
// private static BitSet mismatchesCovariateBitSet = BitSetUtils.bitSetFrom(0);
// private static BitSet insertionsCovariateBitSet = BitSetUtils.bitSetFrom(1);
// private static BitSet deletionsCovariateBitSet = BitSetUtils.bitSetFrom(2);
private static HashMap<String, RecalDataManager.BaseRecalibrationType> nameToType = new HashMap<String, RecalDataManager.BaseRecalibrationType>();
private static HashMap<BitSet, String> bitSetToName = new HashMap<BitSet, String>();
public CovariateKeySet(int readLength, int numberOfCovariates) {
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateName);
// numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
// initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateBitSet);
// initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateBitSet);
// initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateBitSet);
this.nextCovariateIndex = 0;
// nameToType.put(mismatchesCovariateName, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION);
// nameToType.put(insertionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
// nameToType.put(deletionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
//
// bitSetToName.put(BitSetUtils.bitSetFrom(0), mismatchesCovariateName);
// bitSetToName.put(BitSetUtils.bitSetFrom(1), insertionsCovariateName);
// bitSetToName.put(BitSetUtils.bitSetFrom(2), deletionsCovariateName);
}
public void addCovariate(CovariateValues covariate) {
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
nextCovariateIndex++;
}
public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
if (modelString.equals(mismatchesCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
else if (modelString.equals(insertionsCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
else if (modelString.equals(deletionsCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
public static RecalDataManager.BaseRecalibrationType errorModelFrom(final String modelString) {
if (!nameToType.containsKey(modelString))
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
return nameToType.get(modelString);
}
public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
public static String eventNameFrom(final BitSet bitSet) {
if (!bitSetToName.containsKey(bitSet))
throw new ReviewedStingException("Unrecognized Event Type BitSet: " + bitSet);
return bitSetToName.get(bitSet);
}
public BitSet[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
switch (errorModel) {
case BASE_SUBSTITUTION:
return getMismatchesKeySet(readPosition);
return getMismatchesKeySet(readPosition);
case BASE_INSERTION:
return getInsertionsKeySet(readPosition);
return getInsertionsKeySet(readPosition);
case BASE_DELETION:
return getDeletionsKeySet(readPosition);
return getDeletionsKeySet(readPosition);
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
}
}
public Object[] getMismatchesKeySet(int readPosition) {
public BitSet[] getMismatchesKeySet(int readPosition) {
return mismatchesKeySet[readPosition];
}
public Object[] getInsertionsKeySet(int readPosition) {
public BitSet[] getInsertionsKeySet(int readPosition) {
return insertionsKeySet[readPosition];
}
public Object[] getDeletionsKeySet(int readPosition) {
public BitSet[] getDeletionsKeySet(int readPosition) {
return deletionsKeySet[readPosition];
}
private void transposeCovariateValues (Object [][] keySet, Object [] covariateValues) {
for (int i=0; i<covariateValues.length; i++)
keySet[i][nextCovariateIndex] = covariateValues[i];
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
for (int i = 0; i < covariateValues.length; i++)
keySet[i][nextCovariateIndex] = covariateValues[i];
}
private void initializeCovariateKeySet (Object[][] keySet, String covariateName) {
private void initializeCovariateKeySet(BitSet[][] keySet, BitSet covariateName) {
int readLength = keySet.length;
int lastCovariateIndex = keySet[0].length - 1;
for (int i = 0; i < readLength; i++)
for (int i = 0; i < readLength; i++)
keySet[i][lastCovariateIndex] = covariateName;
}
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import java.util.BitSet;
/**
* An object to hold the different covariate values for all bases in the read.
*
@ -12,25 +14,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* @since 2/8/12
*/
public class CovariateValues {
private Object[] mismatches;
private Object[] insertions;
private Object[] deletions;
private BitSet[] mismatches;
private BitSet[] insertions;
private BitSet[] deletions;
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
this.mismatches = mismatch;
this.insertions = insertion;
this.deletions = deletion;
}
public Object[] getMismatches() {
public BitSet[] getMismatches() {
return mismatches;
}
public Object[] getInsertions() {
public BitSet[] getInsertions() {
return insertions;
}
public Object[] getDeletions() {
public BitSet[] getDeletions() {
return deletions;
}

View File

@ -1,10 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.BitSet;
import java.util.EnumSet;
/*
@ -59,13 +61,13 @@ public class CycleCovariate implements StandardCovariate {
// Used to pick out the covariate's value from attributes of the read
@Override
public CovariateValues getValues(final GATKSAMRecord read) {
Integer [] cycles = new Integer[read.getReadLength()];
BitSet[] cycles = new BitSet[read.getReadLength()];
final NGSPlatform ngsPlatform = read.getNGSPlatform();
// Discrete cycle platforms
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
final int init;
final int increment;
final short init;
final short increment;
if (!read.getReadNegativeStrandFlag()) {
// Differentiate between first and second of pair.
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
@ -88,19 +90,19 @@ public class CycleCovariate implements StandardCovariate {
else {
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
//second of pair, negative strand
init = -read.getReadLength();
init = (short) -read.getReadLength();
increment = 1;
}
else {
//first of pair, negative strand
init = read.getReadLength();
init = (short) read.getReadLength();
increment = -1;
}
}
int cycle = init;
short cycle = init;
for (int i = 0; i < read.getReadLength(); i++) {
cycles[i] = cycle;
cycles[i] = BitSetUtils.bitSetFrom(cycle);
cycle += increment;
}
}
@ -119,7 +121,7 @@ public class CycleCovariate implements StandardCovariate {
// the current sequential model would consider the effects independently instead of jointly.
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
int cycle = multiplyByNegative1 ? -1 : 1;
short cycle = multiplyByNegative1 ? (short) -1 : 1;
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -127,19 +129,19 @@ public class CycleCovariate implements StandardCovariate {
int iii = 0;
while (iii < readLength) {
while (iii < readLength && bases[iii] == (byte) 'T') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii++;
}
while (iii < readLength && bases[iii] == (byte) 'A') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii++;
}
while (iii < readLength && bases[iii] == (byte) 'C') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii++;
}
while (iii < readLength && bases[iii] == (byte) 'G') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii++;
}
if (iii < readLength) {
@ -149,7 +151,7 @@ public class CycleCovariate implements StandardCovariate {
cycle++;
}
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii++;
}
@ -159,19 +161,19 @@ public class CycleCovariate implements StandardCovariate {
int iii = readLength - 1;
while (iii >= 0) {
while (iii >= 0 && bases[iii] == (byte) 'T') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii--;
}
while (iii >= 0 && bases[iii] == (byte) 'A') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii--;
}
while (iii >= 0 && bases[iii] == (byte) 'C') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii--;
}
while (iii >= 0 && bases[iii] == (byte) 'G') {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii--;
}
if (iii >= 0) {
@ -181,7 +183,7 @@ public class CycleCovariate implements StandardCovariate {
cycle++;
}
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
cycles[iii] = cycle;
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
iii--;
}
}
@ -192,7 +194,7 @@ public class CycleCovariate implements StandardCovariate {
else {
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
}
return new CovariateValues(cycles, cycles, cycles);
}
@ -201,4 +203,14 @@ public class CycleCovariate implements StandardCovariate {
public final Object getValue(final String str) {
return Integer.parseInt(str);
}
@Override
public String keyFromBitSet(BitSet key) {
return String.format("%d", BitSetUtils.shortFrom(key));
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
}
}

View File

@ -1,7 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.BitSet;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -37,6 +40,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class QualityScoreCovariate implements RequiredCovariate {
private final int MAX_QUAL = 50;
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize(final RecalibrationArgumentCollection RAC) {
@ -46,18 +51,18 @@ public class QualityScoreCovariate implements RequiredCovariate {
public CovariateValues getValues(final GATKSAMRecord read) {
int readLength = read.getReadLength();
Integer [] mismatches = new Integer[readLength];
Integer [] insertions = new Integer[readLength];
Integer [] deletions = new Integer[readLength];
BitSet[] mismatches = new BitSet[readLength];
BitSet[] insertions = new BitSet[readLength];
BitSet[] deletions = new BitSet[readLength];
byte [] baseQualities = read.getBaseQualities();
byte [] baseInsertionQualities = read.getBaseInsertionQualities();
byte [] baseDeletionQualities = read.getBaseDeletionQualities();
byte[] baseQualities = read.getBaseQualities();
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
for (int i=0; i<baseQualities.length; i++) {
mismatches[i] = (int) baseQualities[i];
insertions[i] = (int) baseInsertionQualities[i];
deletions[i] = (int) baseDeletionQualities[i];
for (int i = 0; i < baseQualities.length; i++) {
mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
}
return new CovariateValues(mismatches, insertions, deletions);
@ -68,4 +73,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
public final Object getValue(final String str) {
return Integer.parseInt(str);
}
@Override
public String keyFromBitSet(BitSet key) {
return String.format("%d", BitSetUtils.longFrom(key));
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(MAX_QUAL);
}
}

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
/*
@ -39,7 +41,7 @@ import java.util.HashMap;
*/
public class ReadGroupCovariate implements RequiredCovariate {
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
private short nextId = 0;
@ -54,7 +56,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
final int l = read.getReadLength();
final String readGroupId = read.getReadGroup().getReadGroupId();
short shortId;
if (readGroupLookupTable.containsKey(readGroupId))
if (readGroupLookupTable.containsKey(readGroupId))
shortId = readGroupLookupTable.get(readGroupId);
else {
shortId = nextId;
@ -62,8 +64,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
}
Short [] readGroups = new Short[l];
Arrays.fill(readGroups, shortId);
BitSet rg = BitSetUtils.bitSetFrom(shortId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
BitSet[] readGroups = new BitSet[l];
Arrays.fill(readGroups, rg);
return new CovariateValues(readGroups, readGroups, readGroups);
}
@ -72,10 +75,20 @@ public class ReadGroupCovariate implements RequiredCovariate {
public final Object getValue(final String str) {
return str;
}
@Override
public String keyFromBitSet(BitSet key) {
return decodeReadGroup((short) BitSetUtils.longFrom(key));
}
public final String decodeReadGroup(final short id) {
return readGroupReverseLookupTable.get(id);
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
}
}

View File

@ -58,18 +58,44 @@ public class RecalDataManager {
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullPlatform = false;
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
public enum BaseRecalibrationType {
BASE_SUBSTITUTION,
BASE_INSERTION,
BASE_DELETION
BASE_SUBSTITUTION(0, "M"),
BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D");
public int index;
public String representation;
private BaseRecalibrationType(int index, String representation) {
this.index = index;
this.representation = representation;
}
public static BaseRecalibrationType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
}
@Override
public String toString() {
return representation;
}
}
public enum SOLID_RECAL_MODE {
@ -119,7 +145,7 @@ public class RecalDataManager {
dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
@ -136,10 +162,10 @@ public class RecalDataManager {
}
}
public static CovariateKeySet getAllCovariateValuesFor(GATKSAMRecord read) {
public static CovariateKeySet covariateKeySetFrom(GATKSAMRecord read) {
return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
}
/**
* Add the given mapping to all of the collapsed hash tables
*
@ -147,7 +173,7 @@ public class RecalDataManager {
* @param fullDatum The RecalDatum which is the data for this mapping
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
*/
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel) {
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
//data.put(key, thisDatum); // add the mapping to the main table
@ -208,7 +234,7 @@ public class RecalDataManager {
*/
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
@ -551,6 +577,7 @@ public class RecalDataManager {
/**
* Given the base and the color calculate the next base in the sequence
*
* @param read the read
* @param prevBase The base
* @param color The color
* @return The next base in the sequence
@ -615,11 +642,12 @@ public class RecalDataManager {
* Computes all requested covariates for every offset in the given read
* by calling covariate.getValues(..).
*
* It populates an array of covariate values where result[i][j] is the covariate
* value for the ith position in the read and the jth covariate in
* reqeustedCovariates list.
*
* @param read The read for which to compute covariate values.
* @param requestedCovariates The list of requested covariates.
* @return An array of covariate values where result[i][j] is the covariate
* value for the ith position in the read and the jth covariate in
* reqeustedCovariates list.
*/
public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
final int numRequestedCovariates = requestedCovariates.size();

View File

@ -94,7 +94,7 @@ public class RecalDatumOptimized {
public final double empiricalQualDouble(final int smoothing, final double maxQual) {
final double doubleMismatches = (double) (numMismatches + smoothing);
final double doubleObservations = (double) (numObservations + smoothing);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, maxQual);
}
@ -106,9 +106,10 @@ public class RecalDatumOptimized {
public final byte empiricalQualByte() {
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
}
}
public final String outputToCSV() {
@Override
public final String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -92,16 +91,6 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
protected boolean RUN_WITHOUT_DBSNP = false;
/////////////////////////////
// protected Member Variables
/////////////////////////////
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
/**
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
* reads which have had the reference inserted because of color space inconsistencies.
@ -153,7 +142,6 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
public byte DELETIONS_DEFAULT_QUALITY = 45;
@Hidden
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = null;
@ -161,5 +149,4 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
}

View File

@ -0,0 +1,284 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.util.BitSet;
/**
* Utilities for bitset conversion
*
* @author Mauricio Carneiro
* @since 3/5/12
*/
public class BitSetUtils {
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
static final private byte NBITS_LONG_REPRESENTATION = 64; // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
static final private byte NBITS_SHORT_REPRESENTATION = 16; // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
/**
* Creates an long out of a bitset
*
* @param bitSet the bitset
* @return a long from the bitset representation
*/
public static long longFrom(final BitSet bitSet) {
return longFrom(bitSet, NBITS_LONG_REPRESENTATION);
}
/**
* Creates a short integer from a bitset
*
* @param bitSet the bitset
* @return a short from the bitset representation
*/
public static short shortFrom(final BitSet bitSet) {
return (short) longFrom(bitSet, NBITS_SHORT_REPRESENTATION);
}
/**
* Cretes an integer with any number of bits (up to 64 -- long precision) from a bitset
*
* @param bitSet the bitset
* @param nBits the number of bits to be used for this representation
* @return an integer with nBits from the bitset representation
*/
public static long longFrom(final BitSet bitSet, final int nBits) {
long number = 0;
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0 && bitIndex <= nBits; bitIndex = bitSet.nextSetBit(bitIndex + 1))
number |= 1L << bitIndex;
return number;
}
/**
* Creates a BitSet representation of a given long
*
* @param number the number to turn into a bitset
* @return a bitset representation of the long
*/
public static BitSet bitSetFrom(long number) {
return bitSetFrom(number, NBITS_LONG_REPRESENTATION);
}
/**
* Creates a BitSet representation of a given short
*
* @param number the number to turn into a bitset
* @return a bitset representation of the short
*/
public static BitSet bitSetFrom(short number) {
return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
}
/**
* Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
*
* @param number the number to turn into a bitset
* @param nBits the number of bits to use as precision for this conversion
* @return a bitset representation of the integer
*/
public static BitSet bitSetFrom(long number, int nBits) {
BitSet bitSet = new BitSet();
boolean isNegative = number < 0;
int bitIndex = 0;
while (number != 0) {
if (number % 2 != 0)
bitSet.set(bitIndex);
bitIndex++;
number /= 2;
}
if (isNegative) {
boolean foundFirstSetBit = false;
for (int i = bitSet.nextSetBit(0); i < nBits && i >= 0; i++) {
boolean bit = bitSet.get(i);
if (!foundFirstSetBit && bit)
foundFirstSetBit = true; // maintain all bits until the first 1 is found (inclusive)
else if (foundFirstSetBit)
bitSet.flip(i); // flip every other bit up to NBITS_REPRESENTATION
}
}
return bitSet;
}
/**
* Converts a BitSet into the dna string representation.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
* base_10 representation of the sequence. This is important for us to know how to bring the number
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
* as 0's and leading 0's are omitted).
*
* quasi-canonical because A is represented by a 0, therefore,
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
* we have : 0, 1, 2, 3, 00, 01, 02, ...
*
* but we can correctly decode it because we know the final length.
*
* @param bitSet the bitset representation of the dna sequence
* @return the dna sequence represented by the bitset
*/
public static String dnaFrom(final BitSet bitSet) {
long number = longFrom(bitSet); // the base_10 representation of the bit set
if (number < 0)
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
String dna = "";
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
byte base = (byte) (number % 4);
switch (base) {
case 0:
dna = "A" + dna;
break;
case 1:
dna = "C" + dna;
break;
case 2:
dna = "G" + dna;
break;
case 3:
dna = "T" + dna;
break;
}
number /= 4;
}
for (int j = dna.length(); j < length; j++)
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
return dna;
}
/**
* Creates a BitSet representation of a given dna string.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* The bit representation of a dna string is the simple:
* 0 A 4 AA 8 CA
* 1 C 5 AC ...
* 2 G 6 AG 1343 TTGGT
* 3 T 7 AT 1364 TTTTT
*
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
* preceded the string (with smaller lengths).
*
* @param dna the dna sequence
* @return the bitset representing the dna sequence
*/
public static BitSet bitSetFrom(String dna) {
if (dna.length() > MAX_DNA_CONTEXT)
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
long preContext = combinationsFor(dna.length() - 1); // the sum of all combinations that preceded the length of the dna string
for (int i = 0; i < dna.length(); i++) {
baseTen *= 4;
switch (dna.charAt(i)) {
case 'A':
baseTen += 0;
break;
case 'C':
baseTen += 1;
break;
case 'G':
baseTen += 2;
break;
case 'T':
baseTen += 3;
break;
}
}
return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
}
/**
* Calculates the number of bits necessary to represent a given number of elements
*
* @param numberOfElements the number of elements to represent (must be positive)
* @return the number of bits necessary to represent this many elements
*/
public static int numberOfBitsToRepresent(long numberOfElements) {
if (numberOfElements < 0)
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
if (numberOfElements == 1L)
return 1; // special case
int n = 0;
numberOfElements--;
while (numberOfElements > 0) {
numberOfElements = numberOfElements >> 1;
n++;
}
return n;
}
/**
* Calculates the length of the DNA context for a given base 10 number
*
* It is important to know the length given the base 10 number to calculate the number of combinations
* and to disambiguate the "quasi-canonical" state.
*
* This method also calculates the number of combinations as a by-product, but since it memoizes the
* results, a subsequent call to combinationsFor(length) is O(1).
*
* @param number the base 10 representation of the bitset
* @return the length of the DNA context represented by this number
*/
private static int contextLengthFor(long number) {
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
while (combinations <= number) { // find the length of the dna string (length)
length++;
combinations = combinationsFor(length); // calculate the next context
}
return length;
}
/**
* The sum of all combinations of a context of a given length from length = 0 to length.
*
* Memoized implementation of sum(4^i) , where i=[0,length]
*
* @param length the length of the DNA context
* @return the sum of all combinations leading up to this context length.
*/
private static long combinationsFor(int length) {
if (length > MAX_DNA_CONTEXT)
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
// only calculate the number of combinations if the table hasn't already cached the value
if (length > 0 && combinationsPerLength[length] == 0) {
long combinations = 0L;
for (int i = 1; i <= length; i++)
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
combinationsPerLength[length] = combinations;
}
return combinationsPerLength[length];
}
public static byte[] sizeOf(Object obj) throws java.io.IOException
{
ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
objectOutputStream.writeObject(obj);
objectOutputStream.flush();
objectOutputStream.close();
byteObject.close();
return byteObject.toByteArray();
}
}

View File

@ -29,7 +29,6 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.math.BigDecimal;
@ -1527,124 +1526,4 @@ public class MathUtils {
}
/**
* Creates an integer out of a bitset
*
* @param bitSet the bitset
* @return an integer with the bitset representation
*/
public static long intFrom(final BitSet bitSet) {
long number = 0;
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
number |= 1L << bitIndex;
return number;
}
/**
* Creates a BitSet representation of a given integer
*
* @param number the number to turn into a bitset
* @return a bitset representation of the integer
*/
public static BitSet bitSetFrom(long number) {
BitSet bitSet = new BitSet();
int bitIndex = 0;
while (number > 0) {
if (number%2 > 0)
bitSet.set(bitIndex);
bitIndex++;
number /= 2;
}
return bitSet;
}
/**
* Converts a BitSet into the dna string representation.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
* base_10 representation of the sequence. This is important for us to know how to bring the number
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
* as 0's and leading 0's are omitted).
*
* quasi-canonical because A is represented by a 0, therefore,
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
* we have : 0, 1, 2, 3, 00, 01, 02, ...
*
* but we can correctly decode it because we know the final length.
*
* @param bitSet the bitset representation of the dna sequence
* @return the dna sequence represented by the bitset
*/
public static String dnaFrom(final BitSet bitSet) {
long number = intFrom(bitSet); // the base_10 representation of the bit set
long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
long nextContext = 4; // the next context (we advance it so we know which one was preceding it).
int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
while (nextContext <= number) { // find the length of the dna string (i)
preContext = nextContext; // keep track of the number of combinations in the preceding context
nextContext += Math.pow(4, ++i);// calculate the next context
}
number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
String dna = "";
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
byte base = (byte) (number % 4);
switch (base) {
case 0 : dna = "A" + dna; break;
case 1 : dna = "C" + dna; break;
case 2 : dna = "G" + dna; break;
case 3 : dna = "T" + dna; break;
}
number /= 4;
}
for (int j = dna.length(); j < i; j++)
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
return dna;
}
/**
* Creates a BitSet representation of a given dna string.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* The bit representation of a dna string is the simple:
* 0 A 4 AA 8 CA
* 1 C 5 AC ...
* 2 G 6 AG 1343 TTGGT
* 3 T 7 AT 1364 TTTTT
*
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
* preceded the string (with smaller lengths).
*
* @param dna the dna sequence
* @return the bitset representing the dna sequence
*/
public static BitSet bitSetFrom(String dna) {
if (dna.length() > 31)
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
long preContext = 0; // the sum of all combinations that preceded the length of the dna string
for (int i=0; i<dna.length(); i++) {
baseTen *= 4;
switch(dna.charAt(i)) {
case 'A': baseTen += 0; break;
case 'C': baseTen += 1; break;
case 'G': baseTen += 2; break;
case 'T': baseTen += 3; break;
}
if (i>0)
preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...)
}
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
}
}

View File

@ -43,7 +43,7 @@ import java.util.regex.Pattern;
/**
* Utility methods to facilitate on-the-fly base quality score recalibration.
*
*
* User: rpoplin
* Date: 2/4/12
*/
@ -58,7 +58,7 @@ public class BaseRecalibration {
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
public BaseRecalibration( final File RECAL_FILE ) {
public BaseRecalibration(final File RECAL_FILE) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
@ -68,27 +68,29 @@ public class BaseRecalibration {
// Read in the data from the csv file and populate the data map and covariates list
boolean sawEOF = false;
try {
for ( String line : new XReadLines(RECAL_FILE) ) {
for (String line : new XReadLines(RECAL_FILE)) {
lineNumber++;
if ( EOF_MARKER.equals(line) ) {
if (EOF_MARKER.equals(line)) {
sawEOF = true;
} else if( COMMENT_PATTERN.matcher(line).matches() ) {
}
else if (COMMENT_PATTERN.matcher(line).matches()) {
; // Skip over the comment lines, (which start with '#')
}
// Read in the covariates that were used from the input file
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
if( foundAllCovariates ) {
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
} else { // Found the covariate list in input file, loop through all of them and instantiate them
else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
if (foundAllCovariates) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
}
else { // Found the covariate list in input file, loop through all of them and instantiate them
String[] vals = line.split(",");
for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
for (int iii = 0; iii < vals.length - 4; iii++) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
boolean foundClass = false;
for( Class<?> covClass : classes ) {
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
for (Class<?> covClass : classes) {
if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
foundClass = true;
try {
Covariate covariate = (Covariate)covClass.newInstance();
requestedCovariates.add( covariate );
Covariate covariate = (Covariate) covClass.newInstance();
requestedCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
@ -96,63 +98,65 @@ public class BaseRecalibration {
}
}
if( !foundClass ) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
if (!foundClass) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
}
}
}
} else { // Found a line of data
if( !foundAllCovariates ) {
}
else { // Found a line of data
if (!foundAllCovariates) {
foundAllCovariates = true;
// At this point all the covariates should have been found and initialized
if( requestedCovariates.size() < 2 ) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
if (requestedCovariates.size() < 2) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
}
final boolean createCollapsedTables = true;
// Initialize any covariate member variables using the shared argument collection
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
for( Covariate cov : requestedCovariates ) {
cov.initialize( RAC );
for (Covariate cov : requestedCovariates) {
cov.initialize(RAC);
}
// Initialize the data hashMaps
dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
}
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
}
}
} catch ( FileNotFoundException e ) {
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
} catch ( NumberFormatException e ) {
} catch (NumberFormatException e) {
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
}
if ( !sawEOF ) {
if (!sawEOF) {
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
}
if( dataManager == null ) {
if (dataManager == null) {
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
}
dataManager.generateEmpiricalQualities( 1, MAX_QUALITY_SCORE );
dataManager.generateEmpiricalQualities(1, MAX_QUALITY_SCORE);
}
/**
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
*
* @param line A line of CSV data read from the recalibration table data file
*/
private void addCSVData(final File file, final String line) {
final String[] vals = line.split(",");
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
if (vals.length != requestedCovariates.size() + 4) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
}
@ -160,48 +164,48 @@ public class BaseRecalibration {
final Object[] key = new Object[requestedCovariates.size()];
Covariate cov;
int iii;
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
cov = requestedCovariates.get( iii );
key[iii] = cov.getValue( vals[iii] );
for (iii = 0; iii < requestedCovariates.size(); iii++) {
cov = requestedCovariates.get(iii);
key[iii] = cov.getValue(vals[iii]);
}
final String modelString = vals[iii++];
final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.errorModelFrom(modelString);
// Create a new datum using the number of observations, number of mismatches, and reported quality score
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
// Add that datum to all the collapsed tables which will be used in the sequential calculation
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
dataManager.addToAllTables(key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
}
public void recalibrateRead( final GATKSAMRecord read ) {
public void recalibrateRead(final GATKSAMRecord read) {
//compute all covariate values for this read
RecalDataManager.computeCovariates(read, requestedCovariates);
final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
final CovariateKeySet covariateKeySet = RecalDataManager.covariateKeySetFrom(read);
for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
final byte[] originalQuals = read.getBaseQualities( errorModel );
for (final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values()) {
final byte[] originalQuals = read.getBaseQualities(errorModel);
final byte[] recalQuals = originalQuals.clone();
// For each base in the read
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
for (int offset = 0; offset < read.getReadLength(); offset++) {
final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length - 1); // need to strip off the error mode which was appended to the list of covariates
// BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
//Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
//if( qualityScore == null ) {
final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
final byte qualityScore = performSequentialQualityCalculation(errorModel, fullCovariateKey);
// qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
//}
recalQuals[offset] = qualityScore;
}
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
read.setBaseQualities( recalQuals, errorModel );
preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
read.setBaseQualities(recalQuals, errorModel);
}
}
@ -211,27 +215,28 @@ public class BaseRecalibration {
*
* Given the full recalibration table, we perform the following preprocessing steps:
*
* - calculate the global quality score shift across all data [DeltaQ]
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
* - The final shift equation is:
* - calculate the global quality score shift across all data [DeltaQ]
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
* - The final shift equation is:
*
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
*
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
* @param key The list of Comparables that were calculated from the covariates
* @return A recalibrated quality score as a byte
*/
private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
private byte performSequentialQualityCalculation(final RecalDataManager.BaseRecalibrationType errorModel, final Object... key) {
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] qualityScoreCollapsedKey = new Object[2];
final Object[] covariateCollapsedKey = new Object[3];
// The global quality shift (over the read group only)
readGroupCollapsedKey[0] = key[0];
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0, errorModel).get(readGroupCollapsedKey));
double globalDeltaQ = 0.0;
if( globalRecalDatum != null ) {
if (globalRecalDatum != null) {
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
@ -240,9 +245,9 @@ public class BaseRecalibration {
// The shift in quality between reported and empirical
qualityScoreCollapsedKey[0] = key[0];
qualityScoreCollapsedKey[1] = key[1];
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1, errorModel).get(qualityScoreCollapsedKey));
double deltaQReported = 0.0;
if( qReportedRecalDatum != null ) {
if (qReportedRecalDatum != null) {
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
}
@ -252,27 +257,28 @@ public class BaseRecalibration {
double deltaQCovariateEmpirical;
covariateCollapsedKey[0] = key[0];
covariateCollapsedKey[1] = key[1];
for( int iii = 2; iii < key.length; iii++ ) {
covariateCollapsedKey[2] = key[iii]; // The given covariate
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
if( covariateRecalDatum != null ) {
for (int iii = 2; iii < key.length; iii++) {
covariateCollapsedKey[2] = key[iii]; // The given covariate
final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii, errorModel).get(covariateCollapsedKey));
if (covariateRecalDatum != null) {
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
}
}
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
}
/**
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
*
* @param originalQuals The list of original base quality scores
* @param recalQuals A list of the new recalibrated quality scores
* @param recalQuals A list of the new recalibrated quality scores
*/
private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
for( int iii = 0; iii < recalQuals.length; iii++ ) {
if( originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
for (int iii = 0; iii < recalQuals.length; iii++) {
if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
recalQuals[iii] = originalQuals[iii];
}
}

View File

@ -213,8 +213,8 @@ public class GATKSAMRecord extends BAMRecord {
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
if( quals == null ) {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
}
return quals;

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
@ -495,7 +496,7 @@ public class ReadUtils {
/**
* Is a base inside a read?
*
* @param read the read to evaluate
* @param read the read to evaluate
* @param referenceCoordinate the reference coordinate of the base to test
* @return true if it is inside the read, false otherwise.
*/
@ -541,9 +542,9 @@ public class ReadUtils {
*
* See getCoverageDistributionOfRead for information on how the coverage is calculated.
*
* @param list the list of reads covering the region
* @param list the list of reads covering the region
* @param startLocation the first reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @return an array with the coverage of each position from startLocation to stopLocation
*/
public static int [] getCoverageDistributionOfReads(List<GATKSAMRecord> list, int startLocation, int stopLocation) {
@ -563,9 +564,9 @@ public class ReadUtils {
* Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
* reads for variant regions, and deletions count as variants)
*
* @param read the read to get the coverage distribution of
* @param read the read to get the coverage distribution of
* @param startLocation the first reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @return an array with the coverage of each position from startLocation to stopLocation
*/
public static int [] getCoverageDistributionOfRead(GATKSAMRecord read, int startLocation, int stopLocation) {
@ -611,9 +612,9 @@ public class ReadUtils {
* Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with value==true meaning it contributes to the coverage.
* Example: Read => {true, true, false, ... false}
*
* @param readList the list of reads to generate the association mappings
* @param readList the list of reads to generate the association mappings
* @param startLocation the first reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @param stopLocation the last reference coordinate of the region (inclusive)
* @return the two hashmaps described above
*/
public static Pair<HashMap<Integer, HashSet<GATKSAMRecord>> , HashMap<GATKSAMRecord, Boolean[]>> getBothReadToLociMappings (List<GATKSAMRecord> readList, int startLocation, int stopLocation) {
@ -622,7 +623,6 @@ public class ReadUtils {
HashMap<Integer, HashSet<GATKSAMRecord>> locusToReadMap = new HashMap<Integer, HashSet<GATKSAMRecord>>(2*(stopLocation - startLocation + 1), 0.5f);
HashMap<GATKSAMRecord, Boolean[]> readToLocusMap = new HashMap<GATKSAMRecord, Boolean[]>(2*readList.size(), 0.5f);
for (int i = startLocation; i <= stopLocation; i++)
locusToReadMap.put(i, new HashSet<GATKSAMRecord>()); // Initialize the locusToRead map with empty lists
@ -631,7 +631,7 @@ public class ReadUtils {
int [] readCoverage = getCoverageDistributionOfRead(read, startLocation, stopLocation);
for (int i=0; i<readCoverage.length; i++) {
for (int i = 0; i < readCoverage.length; i++) {
int refLocation = i + startLocation;
if (readCoverage[i] > 0) {
// Update the hash for this locus
@ -649,6 +649,55 @@ public class ReadUtils {
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
}
/**
* Create random read qualities
*
* @param length the length of the read
* @return an array with randomized base qualities between 0 and 50
*/
public static byte[] createRandomReadQuals(int length) {
Random random = GenomeAnalysisEngine.getRandomGenerator();
byte[] quals = new byte[length];
for (int i = 0; i < length; i++)
quals[i] = (byte) random.nextInt(50);
return quals;
}
/**
* Create random read qualities
*
* @param length the length of the read
* @param allowNs whether or not to allow N's in the read
* @return an array with randomized bases (A-N) with equal probability
*/
public static byte[] createRandomReadBases(int length, boolean allowNs) {
Random random = GenomeAnalysisEngine.getRandomGenerator();
int numberOfBases = allowNs ? 5 : 4;
byte[] bases = new byte[length];
for (int i = 0; i < length; i++) {
switch (random.nextInt(numberOfBases)) {
case 0:
bases[i] = 'A';
break;
case 1:
bases[i] = 'C';
break;
case 2:
bases[i] = 'G';
break;
case 3:
bases[i] = 'T';
break;
case 4:
bases[i] = 'N';
break;
default:
throw new ReviewedStingException("Something went wrong, this is just impossible");
}
}
return bases;
}
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
int sequenceRecordIndex = 0;
@ -656,4 +705,5 @@ public class ReadUtils {
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
return Arrays.deepToString(sequenceRecordNames);
}
}

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -12,37 +12,13 @@ import java.util.BitSet;
import java.util.Random;
/**
* Short one line description of the walker.
*
* <p>
* [Long description of the walker]
* </p>
*
*
* <h2>Input</h2>
* <p>
* [Description of the Input]
* </p>
*
* <h2>Output</h2>
* <p>
* [Description of the Output]
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T [walker name]
* </pre>
*
* @author Mauricio Carneiro
* @since 3/1/12
*/
public class ContextCovariateUnitTest {
ContextCovariate covariate;
RecalibrationArgumentCollection RAC;
Random random;
Random random;
@BeforeClass
public void init() {
@ -55,49 +31,35 @@ public class ContextCovariateUnitTest {
@Test(enabled = true)
public void testSimpleContexts() {
byte [] quals = createRandomReadQuals(101);
byte [] bbases = createRandomReadBases(101);
byte[] quals = ReadUtils.createRandomReadQuals(10000);
byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
String bases = stringFrom(bbases);
// System.out.println("Read: " + bases);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
CovariateValues values = covariate.getValues(read);
verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
}
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
for (int i=0; i<values.length; i++) {
if (i >= contextSize)
Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
else
Assert.assertNull(values[i]);
for (int i = 0; i < values.length; i++) {
String expectedContext = covariate.NO_CONTEXT_VALUE;
if (i >= contextSize) {
String context = bases.substring(i - contextSize, i);
if (!context.contains("N"))
expectedContext = context;
}
// System.out.println(String.format("Context [%d]:\n%s\n%s\n", i, covariate.keyFromBitSet(values[i]), expectedContext));
Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
}
}
private String stringFrom(byte [] array) {
private String stringFrom(byte[] array) {
String s = "";
for (byte value : array)
s += (char) value;
return s;
}
private byte [] createRandomReadQuals(int length) {
byte [] quals = new byte[length];
for (int i=0; i<length; i++)
quals[i] = (byte) random.nextInt(50);
return quals;
}
private byte [] createRandomReadBases(int length) {
byte [] bases = new byte[length];
for (int i=0; i<length; i++) {
switch(random.nextInt(4)) {
case 0: bases[i] = 'A'; break;
case 1: bases[i] = 'C'; break;
case 2: bases[i] = 'G'; break;
case 3: bases[i] = 'T'; break;
}
}
return bases;
}
}

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.BitSet;
import java.util.Random;
/**
* @author Mauricio Carneiro
* @since 3/1/12
*/
public class CycleCovariateUnitTest {
CycleCovariate covariate;
RecalibrationArgumentCollection RAC;
Random random;
@BeforeClass
public void init() {
RAC = new RecalibrationArgumentCollection();
covariate = new CycleCovariate();
random = GenomeAnalysisEngine.getRandomGenerator();
covariate.initialize(RAC);
}
@Test(enabled = true)
public void testSimpleCycles() {
short readLength = 10;
byte[] quals = ReadUtils.createRandomReadQuals(readLength);
byte[] bbases = ReadUtils.createRandomReadBases(readLength, true);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
read.getReadGroup().setPlatform("illumina");
CovariateValues values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) 1, (short) 1);
read.setReadNegativeStrandFlag(true);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
read.setReadPairedFlag(true);
read.setSecondOfPairFlag(true);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
read.setReadNegativeStrandFlag(false);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
}
private void verifyCovariateArray(BitSet[] values, short init, short increment) {
for (short i = 0; i < values.length; i++) {
short actual = Short.decode(covariate.keyFromBitSet(values[i]));
int expected = init + (increment * i);
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
Assert.assertEquals(actual, expected);
}
}
}

View File

@ -0,0 +1,75 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.Random;
/**
* @author Mauricio Carneiro
* @since 3/5/12
*/
public class BitSetUtilsUnitTest {
private static int RANDOM_NUMBERS_TO_TRY = 87380;
private static Random random;
@BeforeClass
public void init() {
random = GenomeAnalysisEngine.getRandomGenerator();
}
@Test(enabled = true)
public void testLongBitSet() {
long[] numbers = {0L, 1L, 428L, 65536L, 239847L, 4611686018427387903L, Long.MAX_VALUE, Long.MIN_VALUE, -1L, -2L, -7L, -128L, -65536L, -100000L};
for (long n : numbers)
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);
for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
long n = random.nextLong();
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n); // Because class Random uses a seed with only 48 bits, this algorithm will not return all possible long values.
}
}
@Test(enabled = true)
public void testShortBitSet() {
short[] numbers = {0, 1, 428, 25934, 23847, 16168, Short.MAX_VALUE, Short.MIN_VALUE, -1, -2, -7, -128, -12312, -31432};
for (long n : numbers)
Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
short n = (short) random.nextInt();
Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
}
}
@Test(enabled = true)
public void testDNAAndBitSetConversion() {
String[] dna = {"AGGTGTTGT", "CCCCCCCCCCCCCC", "GGGGGGGGGGGGGG", "TTTTTTTTTTTTTT", "GTAGACCGATCTCAGCTAGT", "AACGTCAATGCAGTCAAGTCAGACGTGGGTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"};
// Test all contexts of size 1-8.
for (long n = 0; n < RANDOM_NUMBERS_TO_TRY; n++)
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(n)))), n);
// Test the special cases listed in the dna array
for (String d : dna)
Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
}
@Test(enabled = true)
public void testNumberOfBitsToRepresent() {
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
for (int i = 1; i < 63; i++) { // Can't test i == 63 because n1 is a negative number
long n1 = 1L << i;
long n2 = Math.abs(random.nextLong()) % n1;
long n3 = n1 | n2;
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n1), i);
}
}
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -131,7 +130,8 @@ public class MathUtilsUnitTest extends BaseTest {
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
for (int i = 0; i < numbers.length; i++)
r.add((double) numbers[i]);
Assert.assertEquals((long) numbers.length, r.observationCount());
Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
@ -223,35 +223,6 @@ public class MathUtilsUnitTest extends BaseTest {
return set.isEmpty();
}
@Test(enabled = true)
public void testIntAndBitSetConversion() {
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
}
@Test(enabled = true)
public void testDNAAndBitSetConversion() {
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
}
@Test
public void testApproximateLog10SumLog10() {
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
@ -267,54 +238,54 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
}
@Test
public void testNormalizeFromLog10() {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, -1.0, -1.1, -7.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, 0.0, -0.1, -6.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[] {-8.9, -6.7, -9.4, 0.0, -8.9}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.0}), new double[] {0.25, 0.25, 0.25, 0.25}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -1.0}), new double[] {0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
}
/**
* Private function used by testNormalizeFromLog10()
*/
private boolean compareDoubleArrays(double[] b1, double[] b2) {
if( b1.length != b2.length ) {
if (b1.length != b2.length) {
return false; // sanity check
}
for( int i=0; i < b1.length; i++ ){
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
for (int i = 0; i < b1.length; i++) {
if (MathUtils.compareDoubles(b1[i], b2[i]) != 0)
return false;
}
return true;