BitSets keys to lower BQSR's memory footprint
Infrastructure: * Generic BitSet implementation with any precision (up to long) * Two's complement implementation of the bit set handles negative numbers (cycle covariate) * Memoized implementation of the BitSet utils for better performance. * All exponents are now calculated with bit shifts, fixing numerical precision issues with the double Math.pow. * Replace log/sqrt with bitwise logic to get rid of numerical issues BQSR: * All covariates output BitSets and have the functionality to decode them back into Object values. * Covariates are responsible for determining the size of the key they will use (number of bits). * Generalized KeyManager implementation combines any arbitrary number of covariates into one bitset key with event type * No more NestedHashMaps. Single key system now fits in one hash to reduce hash table objects overhead Tests: * Unit tests added to every method of BitSetUtils * Unit tests added to the generalized key system infrastructure of BQSRv2 (KeyManager) * Unit tests added to the cycle and context covariates (will add unit tests to all covariates)
This commit is contained in:
parent
e86ce8f3d6
commit
ca11ab39e7
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -43,7 +43,10 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
private int mismatchesContextSize;
|
||||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
|
||||
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
|
||||
protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -62,7 +65,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
int l = read.getReadLength();
|
||||
BitSet[] mismatches = new BitSet[l];
|
||||
BitSet[] insertions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
|
||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||
byte[] bases = read.getReadBases();
|
||||
|
|
@ -72,7 +75,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
}
|
||||
|
||||
if (negativeStrand) {
|
||||
|
|
@ -89,24 +92,35 @@ public class ContextCovariate implements StandardCovariate {
|
|||
return str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
if (key.equals(NO_CONTEXT_BITSET))
|
||||
return NO_CONTEXT_VALUE;
|
||||
return BitSetUtils.dnaFrom(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Long.bitCount(-1L);
|
||||
}
|
||||
|
||||
/**
|
||||
* calculates the context of a base independent of the covariate mode
|
||||
* calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
|
||||
*
|
||||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @return
|
||||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @return the bitSet representing the Context
|
||||
*/
|
||||
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
|
||||
if (offset < contextSize)
|
||||
return null;
|
||||
|
||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
if (context.contains("N"))
|
||||
return null;
|
||||
|
||||
return MathUtils.bitSetFrom(context);
|
||||
}
|
||||
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
||||
BitSet result = NO_CONTEXT_BITSET;
|
||||
if (offset >= contextSize) {
|
||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
if (!context.contains("N"))
|
||||
result = BitSetUtils.bitSetFrom(context);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the given array in place.
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -53,7 +55,29 @@ public interface Covariate {
|
|||
*/
|
||||
public CovariateValues getValues(GATKSAMRecord read);
|
||||
|
||||
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
/**
|
||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
*
|
||||
* @param str the key in string type (read from the csv)
|
||||
* @return the key in it's correct type.
|
||||
*/
|
||||
public Object getValue(String str);
|
||||
|
||||
/**
|
||||
* Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
|
||||
*
|
||||
* @param key the bitset representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String keyFromBitSet(BitSet key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
*
|
||||
* @return The number of bits used to represent the values of this covariate.
|
||||
*/
|
||||
public int numberOfBits();
|
||||
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -2,87 +2,107 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
* In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateKeySet {
|
||||
private Object[][] mismatchesKeySet;
|
||||
private Object[][] insertionsKeySet;
|
||||
private Object[][] deletionsKeySet;
|
||||
private BitSet[][] mismatchesKeySet;
|
||||
private BitSet[][] insertionsKeySet;
|
||||
private BitSet[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
|
||||
private static String mismatchesCovariateName = "M";
|
||||
private static String insertionsCovariateName = "I";
|
||||
private static String deletionsCovariateName = "D";
|
||||
|
||||
// private static String mismatchesCovariateName = "M";
|
||||
// private static String insertionsCovariateName = "I";
|
||||
// private static String deletionsCovariateName = "D";
|
||||
//
|
||||
// private static BitSet mismatchesCovariateBitSet = BitSetUtils.bitSetFrom(0);
|
||||
// private static BitSet insertionsCovariateBitSet = BitSetUtils.bitSetFrom(1);
|
||||
// private static BitSet deletionsCovariateBitSet = BitSetUtils.bitSetFrom(2);
|
||||
|
||||
private static HashMap<String, RecalDataManager.BaseRecalibrationType> nameToType = new HashMap<String, RecalDataManager.BaseRecalibrationType>();
|
||||
private static HashMap<BitSet, String> bitSetToName = new HashMap<BitSet, String>();
|
||||
|
||||
public CovariateKeySet(int readLength, int numberOfCovariates) {
|
||||
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
||||
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
|
||||
initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
|
||||
initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
|
||||
initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateName);
|
||||
// numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
||||
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
// initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateBitSet);
|
||||
// initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateBitSet);
|
||||
// initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateBitSet);
|
||||
this.nextCovariateIndex = 0;
|
||||
|
||||
// nameToType.put(mismatchesCovariateName, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION);
|
||||
// nameToType.put(insertionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
|
||||
// nameToType.put(deletionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
|
||||
//
|
||||
// bitSetToName.put(BitSetUtils.bitSetFrom(0), mismatchesCovariateName);
|
||||
// bitSetToName.put(BitSetUtils.bitSetFrom(1), insertionsCovariateName);
|
||||
// bitSetToName.put(BitSetUtils.bitSetFrom(2), deletionsCovariateName);
|
||||
}
|
||||
|
||||
|
||||
public void addCovariate(CovariateValues covariate) {
|
||||
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
|
||||
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
|
||||
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
|
||||
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
|
||||
nextCovariateIndex++;
|
||||
}
|
||||
|
||||
public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
|
||||
if (modelString.equals(mismatchesCovariateName))
|
||||
return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
|
||||
else if (modelString.equals(insertionsCovariateName))
|
||||
return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
|
||||
else if (modelString.equals(deletionsCovariateName))
|
||||
return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
|
||||
public static RecalDataManager.BaseRecalibrationType errorModelFrom(final String modelString) {
|
||||
if (!nameToType.containsKey(modelString))
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
|
||||
return nameToType.get(modelString);
|
||||
}
|
||||
|
||||
public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
|
||||
public static String eventNameFrom(final BitSet bitSet) {
|
||||
if (!bitSetToName.containsKey(bitSet))
|
||||
throw new ReviewedStingException("Unrecognized Event Type BitSet: " + bitSet);
|
||||
return bitSetToName.get(bitSet);
|
||||
}
|
||||
|
||||
public BitSet[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
return getMismatchesKeySet(readPosition);
|
||||
case BASE_INSERTION:
|
||||
return getInsertionsKeySet(readPosition);
|
||||
return getInsertionsKeySet(readPosition);
|
||||
case BASE_DELETION:
|
||||
return getDeletionsKeySet(readPosition);
|
||||
return getDeletionsKeySet(readPosition);
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
|
||||
}
|
||||
}
|
||||
|
||||
public Object[] getMismatchesKeySet(int readPosition) {
|
||||
public BitSet[] getMismatchesKeySet(int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
}
|
||||
|
||||
public Object[] getInsertionsKeySet(int readPosition) {
|
||||
public BitSet[] getInsertionsKeySet(int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
public Object[] getDeletionsKeySet(int readPosition) {
|
||||
public BitSet[] getDeletionsKeySet(int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
private void transposeCovariateValues (Object [][] keySet, Object [] covariateValues) {
|
||||
for (int i=0; i<covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
|
||||
for (int i = 0; i < covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
}
|
||||
|
||||
private void initializeCovariateKeySet (Object[][] keySet, String covariateName) {
|
||||
|
||||
private void initializeCovariateKeySet(BitSet[][] keySet, BitSet covariateName) {
|
||||
int readLength = keySet.length;
|
||||
int lastCovariateIndex = keySet[0].length - 1;
|
||||
for (int i = 0; i < readLength; i++)
|
||||
for (int i = 0; i < readLength; i++)
|
||||
keySet[i][lastCovariateIndex] = covariateName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* An object to hold the different covariate values for all bases in the read.
|
||||
*
|
||||
|
|
@ -12,25 +14,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private Object[] mismatches;
|
||||
private Object[] insertions;
|
||||
private Object[] deletions;
|
||||
private BitSet[] mismatches;
|
||||
private BitSet[] insertions;
|
||||
private BitSet[] deletions;
|
||||
|
||||
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
|
||||
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public Object[] getMismatches() {
|
||||
public BitSet[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public Object[] getInsertions() {
|
||||
public BitSet[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public Object[] getDeletions() {
|
||||
public BitSet[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.EnumSet;
|
||||
|
||||
/*
|
||||
|
|
@ -59,13 +61,13 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
Integer [] cycles = new Integer[read.getReadLength()];
|
||||
BitSet[] cycles = new BitSet[read.getReadLength()];
|
||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||
|
||||
// Discrete cycle platforms
|
||||
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||
final int init;
|
||||
final int increment;
|
||||
final short init;
|
||||
final short increment;
|
||||
if (!read.getReadNegativeStrandFlag()) {
|
||||
// Differentiate between first and second of pair.
|
||||
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
|
||||
|
|
@ -88,19 +90,19 @@ public class CycleCovariate implements StandardCovariate {
|
|||
else {
|
||||
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
|
||||
//second of pair, negative strand
|
||||
init = -read.getReadLength();
|
||||
init = (short) -read.getReadLength();
|
||||
increment = 1;
|
||||
}
|
||||
else {
|
||||
//first of pair, negative strand
|
||||
init = read.getReadLength();
|
||||
init = (short) read.getReadLength();
|
||||
increment = -1;
|
||||
}
|
||||
}
|
||||
|
||||
int cycle = init;
|
||||
short cycle = init;
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
cycles[i] = cycle;
|
||||
cycles[i] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
@ -119,7 +121,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// the current sequential model would consider the effects independently instead of jointly.
|
||||
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||
|
||||
int cycle = multiplyByNegative1 ? -1 : 1;
|
||||
short cycle = multiplyByNegative1 ? (short) -1 : 1;
|
||||
|
||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||
|
|
@ -127,19 +129,19 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii++;
|
||||
}
|
||||
if (iii < readLength) {
|
||||
|
|
@ -149,7 +151,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii++;
|
||||
}
|
||||
|
||||
|
|
@ -159,19 +161,19 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii--;
|
||||
}
|
||||
if (iii >= 0) {
|
||||
|
|
@ -181,7 +183,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = cycle;
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
iii--;
|
||||
}
|
||||
}
|
||||
|
|
@ -192,7 +194,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
else {
|
||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||
}
|
||||
|
||||
|
||||
return new CovariateValues(cycles, cycles, cycles);
|
||||
}
|
||||
|
||||
|
|
@ -201,4 +203,14 @@ public class CycleCovariate implements StandardCovariate {
|
|||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.shortFrom(key));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -37,6 +40,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
|
||||
public class QualityScoreCovariate implements RequiredCovariate {
|
||||
|
||||
private final int MAX_QUAL = 50;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
|
|
@ -46,18 +51,18 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int readLength = read.getReadLength();
|
||||
|
||||
Integer [] mismatches = new Integer[readLength];
|
||||
Integer [] insertions = new Integer[readLength];
|
||||
Integer [] deletions = new Integer[readLength];
|
||||
BitSet[] mismatches = new BitSet[readLength];
|
||||
BitSet[] insertions = new BitSet[readLength];
|
||||
BitSet[] deletions = new BitSet[readLength];
|
||||
|
||||
byte [] baseQualities = read.getBaseQualities();
|
||||
byte [] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
byte [] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i=0; i<baseQualities.length; i++) {
|
||||
mismatches[i] = (int) baseQualities[i];
|
||||
insertions[i] = (int) baseInsertionQualities[i];
|
||||
deletions[i] = (int) baseDeletionQualities[i];
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
|
||||
insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
|
||||
deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
|
||||
}
|
||||
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
|
|
@ -68,4 +73,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.longFrom(key));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(MAX_QUAL);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
|
|
@ -39,7 +41,7 @@ import java.util.HashMap;
|
|||
*/
|
||||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
|
||||
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||
private short nextId = 0;
|
||||
|
|
@ -54,7 +56,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
final int l = read.getReadLength();
|
||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||
short shortId;
|
||||
if (readGroupLookupTable.containsKey(readGroupId))
|
||||
if (readGroupLookupTable.containsKey(readGroupId))
|
||||
shortId = readGroupLookupTable.get(readGroupId);
|
||||
else {
|
||||
shortId = nextId;
|
||||
|
|
@ -62,8 +64,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
Short [] readGroups = new Short[l];
|
||||
Arrays.fill(readGroups, shortId);
|
||||
BitSet rg = BitSetUtils.bitSetFrom(shortId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
|
||||
BitSet[] readGroups = new BitSet[l];
|
||||
Arrays.fill(readGroups, rg);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
}
|
||||
|
||||
|
|
@ -72,10 +75,20 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return decodeReadGroup((short) BitSetUtils.longFrom(key));
|
||||
}
|
||||
|
||||
public final String decodeReadGroup(final short id) {
|
||||
return readGroupReverseLookupTable.get(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -58,18 +58,44 @@ public class RecalDataManager {
|
|||
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
||||
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||
private static boolean warnUserNullPlatform = false;
|
||||
|
||||
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
|
||||
public enum BaseRecalibrationType {
|
||||
BASE_SUBSTITUTION,
|
||||
BASE_INSERTION,
|
||||
BASE_DELETION
|
||||
BASE_SUBSTITUTION(0, "M"),
|
||||
BASE_INSERTION(1, "I"),
|
||||
BASE_DELETION(2, "D");
|
||||
|
||||
public int index;
|
||||
public String representation;
|
||||
|
||||
private BaseRecalibrationType(int index, String representation) {
|
||||
this.index = index;
|
||||
this.representation = representation;
|
||||
}
|
||||
|
||||
public static BaseRecalibrationType eventFrom(int index) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return BASE_SUBSTITUTION;
|
||||
case 1:
|
||||
return BASE_INSERTION;
|
||||
case 2:
|
||||
return BASE_DELETION;
|
||||
default:
|
||||
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return representation;
|
||||
}
|
||||
}
|
||||
|
||||
public enum SOLID_RECAL_MODE {
|
||||
|
|
@ -119,7 +145,7 @@ public class RecalDataManager {
|
|||
dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||
dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||
dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
|
||||
for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
|
||||
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
|
||||
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
|
||||
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
|
||||
|
|
@ -136,10 +162,10 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
public static CovariateKeySet getAllCovariateValuesFor(GATKSAMRecord read) {
|
||||
public static CovariateKeySet covariateKeySetFrom(GATKSAMRecord read) {
|
||||
return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add the given mapping to all of the collapsed hash tables
|
||||
*
|
||||
|
|
@ -147,7 +173,7 @@ public class RecalDataManager {
|
|||
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
||||
*/
|
||||
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
|
||||
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel) {
|
||||
|
||||
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
||||
//data.put(key, thisDatum); // add the mapping to the main table
|
||||
|
|
@ -208,7 +234,7 @@ public class RecalDataManager {
|
|||
*/
|
||||
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
|
||||
|
||||
for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
|
||||
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
|
||||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
|
||||
for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
|
||||
|
|
@ -551,6 +577,7 @@ public class RecalDataManager {
|
|||
/**
|
||||
* Given the base and the color calculate the next base in the sequence
|
||||
*
|
||||
* @param read the read
|
||||
* @param prevBase The base
|
||||
* @param color The color
|
||||
* @return The next base in the sequence
|
||||
|
|
@ -615,11 +642,12 @@ public class RecalDataManager {
|
|||
* Computes all requested covariates for every offset in the given read
|
||||
* by calling covariate.getValues(..).
|
||||
*
|
||||
* It populates an array of covariate values where result[i][j] is the covariate
|
||||
* value for the ith position in the read and the jth covariate in
|
||||
* reqeustedCovariates list.
|
||||
*
|
||||
* @param read The read for which to compute covariate values.
|
||||
* @param requestedCovariates The list of requested covariates.
|
||||
* @return An array of covariate values where result[i][j] is the covariate
|
||||
* value for the ith position in the read and the jth covariate in
|
||||
* reqeustedCovariates list.
|
||||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
|
||||
final int numRequestedCovariates = requestedCovariates.size();
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class RecalDatumOptimized {
|
|||
public final double empiricalQualDouble(final int smoothing, final double maxQual) {
|
||||
final double doubleMismatches = (double) (numMismatches + smoothing);
|
||||
final double doubleObservations = (double) (numObservations + smoothing);
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, maxQual);
|
||||
}
|
||||
|
||||
|
|
@ -106,9 +106,10 @@ public class RecalDatumOptimized {
|
|||
|
||||
public final byte empiricalQualByte() {
|
||||
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
|
||||
}
|
||||
}
|
||||
|
||||
public final String outputToCSV() {
|
||||
@Override
|
||||
public final String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -92,16 +91,6 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||
protected boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
/////////////////////////////
|
||||
// protected Member Variables
|
||||
/////////////////////////////
|
||||
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
|
||||
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
|
||||
|
||||
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
|
|
@ -153,7 +142,6 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
|
||||
public byte DELETIONS_DEFAULT_QUALITY = 45;
|
||||
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String DEFAULT_PLATFORM = null;
|
||||
|
|
@ -161,5 +149,4 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,284 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Utilities for bitset conversion
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/5/12
|
||||
*/
|
||||
public class BitSetUtils {
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final private byte NBITS_LONG_REPRESENTATION = 64; // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final private byte NBITS_SHORT_REPRESENTATION = 16; // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
/**
|
||||
* Creates an long out of a bitset
|
||||
*
|
||||
* @param bitSet the bitset
|
||||
* @return a long from the bitset representation
|
||||
*/
|
||||
public static long longFrom(final BitSet bitSet) {
|
||||
return longFrom(bitSet, NBITS_LONG_REPRESENTATION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a short integer from a bitset
|
||||
*
|
||||
* @param bitSet the bitset
|
||||
* @return a short from the bitset representation
|
||||
*/
|
||||
public static short shortFrom(final BitSet bitSet) {
|
||||
return (short) longFrom(bitSet, NBITS_SHORT_REPRESENTATION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cretes an integer with any number of bits (up to 64 -- long precision) from a bitset
|
||||
*
|
||||
* @param bitSet the bitset
|
||||
* @param nBits the number of bits to be used for this representation
|
||||
* @return an integer with nBits from the bitset representation
|
||||
*/
|
||||
public static long longFrom(final BitSet bitSet, final int nBits) {
|
||||
long number = 0;
|
||||
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0 && bitIndex <= nBits; bitIndex = bitSet.nextSetBit(bitIndex + 1))
|
||||
number |= 1L << bitIndex;
|
||||
|
||||
return number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given long
|
||||
*
|
||||
* @param number the number to turn into a bitset
|
||||
* @return a bitset representation of the long
|
||||
*/
|
||||
public static BitSet bitSetFrom(long number) {
|
||||
return bitSetFrom(number, NBITS_LONG_REPRESENTATION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given short
|
||||
*
|
||||
* @param number the number to turn into a bitset
|
||||
* @return a bitset representation of the short
|
||||
*/
|
||||
public static BitSet bitSetFrom(short number) {
|
||||
return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
|
||||
*
|
||||
* @param number the number to turn into a bitset
|
||||
* @param nBits the number of bits to use as precision for this conversion
|
||||
* @return a bitset representation of the integer
|
||||
*/
|
||||
public static BitSet bitSetFrom(long number, int nBits) {
|
||||
BitSet bitSet = new BitSet();
|
||||
boolean isNegative = number < 0;
|
||||
int bitIndex = 0;
|
||||
while (number != 0) {
|
||||
if (number % 2 != 0)
|
||||
bitSet.set(bitIndex);
|
||||
bitIndex++;
|
||||
number /= 2;
|
||||
}
|
||||
if (isNegative) {
|
||||
boolean foundFirstSetBit = false;
|
||||
for (int i = bitSet.nextSetBit(0); i < nBits && i >= 0; i++) {
|
||||
boolean bit = bitSet.get(i);
|
||||
if (!foundFirstSetBit && bit)
|
||||
foundFirstSetBit = true; // maintain all bits until the first 1 is found (inclusive)
|
||||
else if (foundFirstSetBit)
|
||||
bitSet.flip(i); // flip every other bit up to NBITS_REPRESENTATION
|
||||
}
|
||||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param bitSet the bitset representation of the dna sequence
|
||||
* @return the dna sequence represented by the bitset
|
||||
*/
|
||||
public static String dnaFrom(final BitSet bitSet) {
|
||||
long number = longFrom(bitSet); // the base_10 representation of the bit set
|
||||
if (number < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
String dna = "";
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0:
|
||||
dna = "A" + dna;
|
||||
break;
|
||||
case 1:
|
||||
dna = "C" + dna;
|
||||
break;
|
||||
case 2:
|
||||
dna = "G" + dna;
|
||||
break;
|
||||
case 3:
|
||||
dna = "T" + dna;
|
||||
break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
if (dna.length() > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
|
||||
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
long preContext = combinationsFor(dna.length() - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
for (int i = 0; i < dna.length(); i++) {
|
||||
baseTen *= 4;
|
||||
switch (dna.charAt(i)) {
|
||||
case 'A':
|
||||
baseTen += 0;
|
||||
break;
|
||||
case 'C':
|
||||
baseTen += 1;
|
||||
break;
|
||||
case 'G':
|
||||
baseTen += 2;
|
||||
break;
|
||||
case 'T':
|
||||
baseTen += 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the bitset
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsFor(length); // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
* @return the sum of all combinations leading up to this context length.
|
||||
*/
|
||||
private static long combinationsFor(int length) {
|
||||
if (length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
||||
|
||||
// only calculate the number of combinations if the table hasn't already cached the value
|
||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
return combinationsPerLength[length];
|
||||
}
|
||||
|
||||
|
||||
public static byte[] sizeOf(Object obj) throws java.io.IOException
|
||||
{
|
||||
ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
|
||||
ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
|
||||
objectOutputStream.writeObject(obj);
|
||||
objectOutputStream.flush();
|
||||
objectOutputStream.close();
|
||||
byteObject.close();
|
||||
|
||||
return byteObject.toByteArray();
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,6 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
|
@ -1527,124 +1526,4 @@ public class MathUtils {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an integer out of a bitset
|
||||
*
|
||||
* @param bitSet the bitset
|
||||
* @return an integer with the bitset representation
|
||||
*/
|
||||
public static long intFrom(final BitSet bitSet) {
|
||||
long number = 0;
|
||||
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
|
||||
number |= 1L << bitIndex;
|
||||
|
||||
return number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given integer
|
||||
*
|
||||
* @param number the number to turn into a bitset
|
||||
* @return a bitset representation of the integer
|
||||
*/
|
||||
public static BitSet bitSetFrom(long number) {
|
||||
BitSet bitSet = new BitSet();
|
||||
int bitIndex = 0;
|
||||
while (number > 0) {
|
||||
if (number%2 > 0)
|
||||
bitSet.set(bitIndex);
|
||||
bitIndex++;
|
||||
number /= 2;
|
||||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param bitSet the bitset representation of the dna sequence
|
||||
* @return the dna sequence represented by the bitset
|
||||
*/
|
||||
public static String dnaFrom(final BitSet bitSet) {
|
||||
long number = intFrom(bitSet); // the base_10 representation of the bit set
|
||||
long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
|
||||
long nextContext = 4; // the next context (we advance it so we know which one was preceding it).
|
||||
int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
while (nextContext <= number) { // find the length of the dna string (i)
|
||||
preContext = nextContext; // keep track of the number of combinations in the preceding context
|
||||
nextContext += Math.pow(4, ++i);// calculate the next context
|
||||
}
|
||||
number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
String dna = "";
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0 : dna = "A" + dna; break;
|
||||
case 1 : dna = "C" + dna; break;
|
||||
case 2 : dna = "G" + dna; break;
|
||||
case 3 : dna = "T" + dna; break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < i; j++)
|
||||
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
if (dna.length() > 31)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
|
||||
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
long preContext = 0; // the sum of all combinations that preceded the length of the dna string
|
||||
for (int i=0; i<dna.length(); i++) {
|
||||
baseTen *= 4;
|
||||
switch(dna.charAt(i)) {
|
||||
case 'A': baseTen += 0; break;
|
||||
case 'C': baseTen += 1; break;
|
||||
case 'G': baseTen += 2; break;
|
||||
case 'T': baseTen += 3; break;
|
||||
}
|
||||
if (i>0)
|
||||
preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...)
|
||||
}
|
||||
|
||||
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
/**
|
||||
* Utility methods to facilitate on-the-fly base quality score recalibration.
|
||||
*
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 2/4/12
|
||||
*/
|
||||
|
|
@ -58,7 +58,7 @@ public class BaseRecalibration {
|
|||
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
|
||||
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
|
||||
|
||||
public BaseRecalibration( final File RECAL_FILE ) {
|
||||
public BaseRecalibration(final File RECAL_FILE) {
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
|
||||
|
|
@ -68,27 +68,29 @@ public class BaseRecalibration {
|
|||
// Read in the data from the csv file and populate the data map and covariates list
|
||||
boolean sawEOF = false;
|
||||
try {
|
||||
for ( String line : new XReadLines(RECAL_FILE) ) {
|
||||
for (String line : new XReadLines(RECAL_FILE)) {
|
||||
lineNumber++;
|
||||
if ( EOF_MARKER.equals(line) ) {
|
||||
if (EOF_MARKER.equals(line)) {
|
||||
sawEOF = true;
|
||||
} else if( COMMENT_PATTERN.matcher(line).matches() ) {
|
||||
}
|
||||
else if (COMMENT_PATTERN.matcher(line).matches()) {
|
||||
; // Skip over the comment lines, (which start with '#')
|
||||
}
|
||||
// Read in the covariates that were used from the input file
|
||||
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
|
||||
if( foundAllCovariates ) {
|
||||
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||
else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
|
||||
if (foundAllCovariates) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
|
||||
}
|
||||
else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||
String[] vals = line.split(",");
|
||||
for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
|
||||
for (int iii = 0; iii < vals.length - 4; iii++) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
|
||||
boolean foundClass = false;
|
||||
for( Class<?> covClass : classes ) {
|
||||
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
||||
for (Class<?> covClass : classes) {
|
||||
if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
|
||||
foundClass = true;
|
||||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
requestedCovariates.add( covariate );
|
||||
Covariate covariate = (Covariate) covClass.newInstance();
|
||||
requestedCovariates.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
}
|
||||
|
|
@ -96,63 +98,65 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
if( !foundClass ) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
|
||||
if (!foundClass) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else { // Found a line of data
|
||||
if( !foundAllCovariates ) {
|
||||
}
|
||||
else { // Found a line of data
|
||||
if (!foundAllCovariates) {
|
||||
foundAllCovariates = true;
|
||||
|
||||
// At this point all the covariates should have been found and initialized
|
||||
if( requestedCovariates.size() < 2 ) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
|
||||
if (requestedCovariates.size() < 2) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
|
||||
}
|
||||
|
||||
final boolean createCollapsedTables = true;
|
||||
|
||||
// Initialize any covariate member variables using the shared argument collection
|
||||
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
cov.initialize( RAC );
|
||||
for (Covariate cov : requestedCovariates) {
|
||||
cov.initialize(RAC);
|
||||
}
|
||||
// Initialize the data hashMaps
|
||||
dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
|
||||
dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
|
||||
|
||||
}
|
||||
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
|
||||
}
|
||||
}
|
||||
|
||||
} catch ( FileNotFoundException e ) {
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
|
||||
} catch ( NumberFormatException e ) {
|
||||
} catch (NumberFormatException e) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
|
||||
}
|
||||
|
||||
if ( !sawEOF ) {
|
||||
if (!sawEOF) {
|
||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
|
||||
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
||||
}
|
||||
|
||||
if( dataManager == null ) {
|
||||
if (dataManager == null) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
|
||||
}
|
||||
|
||||
dataManager.generateEmpiricalQualities( 1, MAX_QUALITY_SCORE );
|
||||
dataManager.generateEmpiricalQualities(1, MAX_QUALITY_SCORE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
|
||||
*
|
||||
* @param line A line of CSV data read from the recalibration table data file
|
||||
*/
|
||||
private void addCSVData(final File file, final String line) {
|
||||
final String[] vals = line.split(",");
|
||||
|
||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
||||
if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
|
||||
if (vals.length != requestedCovariates.size() + 4) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
|
||||
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
||||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||
}
|
||||
|
|
@ -160,48 +164,48 @@ public class BaseRecalibration {
|
|||
final Object[] key = new Object[requestedCovariates.size()];
|
||||
Covariate cov;
|
||||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
cov = requestedCovariates.get( iii );
|
||||
key[iii] = cov.getValue( vals[iii] );
|
||||
for (iii = 0; iii < requestedCovariates.size(); iii++) {
|
||||
cov = requestedCovariates.get(iii);
|
||||
key[iii] = cov.getValue(vals[iii]);
|
||||
}
|
||||
final String modelString = vals[iii++];
|
||||
final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
|
||||
final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.errorModelFrom(modelString);
|
||||
|
||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||
final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
|
||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||
|
||||
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||
|
||||
dataManager.addToAllTables(key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||
}
|
||||
|
||||
public void recalibrateRead( final GATKSAMRecord read ) {
|
||||
|
||||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
|
||||
//compute all covariate values for this read
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
|
||||
final CovariateKeySet covariateKeySet = RecalDataManager.covariateKeySetFrom(read);
|
||||
|
||||
for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
|
||||
final byte[] originalQuals = read.getBaseQualities( errorModel );
|
||||
for (final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values()) {
|
||||
final byte[] originalQuals = read.getBaseQualities(errorModel);
|
||||
final byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
// For each base in the read
|
||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||
|
||||
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
||||
|
||||
final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
|
||||
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
|
||||
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length - 1); // need to strip off the error mode which was appended to the list of covariates
|
||||
|
||||
// BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
|
||||
//Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
|
||||
//if( qualityScore == null ) {
|
||||
final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
|
||||
final byte qualityScore = performSequentialQualityCalculation(errorModel, fullCovariateKey);
|
||||
// qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
|
||||
//}
|
||||
|
||||
|
||||
recalQuals[offset] = qualityScore;
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
||||
read.setBaseQualities( recalQuals, errorModel );
|
||||
|
||||
preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
|
||||
read.setBaseQualities(recalQuals, errorModel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -211,27 +215,28 @@ public class BaseRecalibration {
|
|||
*
|
||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
||||
*
|
||||
* - calculate the global quality score shift across all data [DeltaQ]
|
||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||
* - The final shift equation is:
|
||||
* - calculate the global quality score shift across all data [DeltaQ]
|
||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||
* - The final shift equation is:
|
||||
*
|
||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||
*
|
||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||
* @param key The list of Comparables that were calculated from the covariates
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
|
||||
private byte performSequentialQualityCalculation(final RecalDataManager.BaseRecalibrationType errorModel, final Object... key) {
|
||||
|
||||
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
||||
final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
|
||||
final Object[] readGroupCollapsedKey = new Object[1];
|
||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
||||
final Object[] covariateCollapsedKey = new Object[3];
|
||||
|
||||
// The global quality shift (over the read group only)
|
||||
readGroupCollapsedKey[0] = key[0];
|
||||
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
|
||||
final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0, errorModel).get(readGroupCollapsedKey));
|
||||
double globalDeltaQ = 0.0;
|
||||
if( globalRecalDatum != null ) {
|
||||
if (globalRecalDatum != null) {
|
||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
|
||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
|
||||
|
|
@ -240,9 +245,9 @@ public class BaseRecalibration {
|
|||
// The shift in quality between reported and empirical
|
||||
qualityScoreCollapsedKey[0] = key[0];
|
||||
qualityScoreCollapsedKey[1] = key[1];
|
||||
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
|
||||
final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1, errorModel).get(qualityScoreCollapsedKey));
|
||||
double deltaQReported = 0.0;
|
||||
if( qReportedRecalDatum != null ) {
|
||||
if (qReportedRecalDatum != null) {
|
||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
|
@ -252,27 +257,28 @@ public class BaseRecalibration {
|
|||
double deltaQCovariateEmpirical;
|
||||
covariateCollapsedKey[0] = key[0];
|
||||
covariateCollapsedKey[1] = key[1];
|
||||
for( int iii = 2; iii < key.length; iii++ ) {
|
||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
|
||||
if( covariateRecalDatum != null ) {
|
||||
for (int iii = 2; iii < key.length; iii++) {
|
||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||
final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii, errorModel).get(covariateCollapsedKey));
|
||||
if (covariateRecalDatum != null) {
|
||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
}
|
||||
}
|
||||
|
||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
||||
return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
|
||||
*
|
||||
* @param originalQuals The list of original base quality scores
|
||||
* @param recalQuals A list of the new recalibrated quality scores
|
||||
* @param recalQuals A list of the new recalibrated quality scores
|
||||
*/
|
||||
private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
|
||||
for( int iii = 0; iii < recalQuals.length; iii++ ) {
|
||||
if( originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||
private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
|
||||
for (int iii = 0; iii < recalQuals.length; iii++) {
|
||||
if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||
recalQuals[iii] = originalQuals[iii];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -213,8 +213,8 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
|
||||
if( quals == null ) {
|
||||
quals = new byte[getBaseQualities().length];
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
|
||||
}
|
||||
return quals;
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -495,7 +496,7 @@ public class ReadUtils {
|
|||
/**
|
||||
* Is a base inside a read?
|
||||
*
|
||||
* @param read the read to evaluate
|
||||
* @param read the read to evaluate
|
||||
* @param referenceCoordinate the reference coordinate of the base to test
|
||||
* @return true if it is inside the read, false otherwise.
|
||||
*/
|
||||
|
|
@ -541,9 +542,9 @@ public class ReadUtils {
|
|||
*
|
||||
* See getCoverageDistributionOfRead for information on how the coverage is calculated.
|
||||
*
|
||||
* @param list the list of reads covering the region
|
||||
* @param list the list of reads covering the region
|
||||
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @return an array with the coverage of each position from startLocation to stopLocation
|
||||
*/
|
||||
public static int [] getCoverageDistributionOfReads(List<GATKSAMRecord> list, int startLocation, int stopLocation) {
|
||||
|
|
@ -563,9 +564,9 @@ public class ReadUtils {
|
|||
* Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
|
||||
* reads for variant regions, and deletions count as variants)
|
||||
*
|
||||
* @param read the read to get the coverage distribution of
|
||||
* @param read the read to get the coverage distribution of
|
||||
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @return an array with the coverage of each position from startLocation to stopLocation
|
||||
*/
|
||||
public static int [] getCoverageDistributionOfRead(GATKSAMRecord read, int startLocation, int stopLocation) {
|
||||
|
|
@ -611,9 +612,9 @@ public class ReadUtils {
|
|||
* Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with value==true meaning it contributes to the coverage.
|
||||
* Example: Read => {true, true, false, ... false}
|
||||
*
|
||||
* @param readList the list of reads to generate the association mappings
|
||||
* @param readList the list of reads to generate the association mappings
|
||||
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||
* @return the two hashmaps described above
|
||||
*/
|
||||
public static Pair<HashMap<Integer, HashSet<GATKSAMRecord>> , HashMap<GATKSAMRecord, Boolean[]>> getBothReadToLociMappings (List<GATKSAMRecord> readList, int startLocation, int stopLocation) {
|
||||
|
|
@ -622,7 +623,6 @@ public class ReadUtils {
|
|||
HashMap<Integer, HashSet<GATKSAMRecord>> locusToReadMap = new HashMap<Integer, HashSet<GATKSAMRecord>>(2*(stopLocation - startLocation + 1), 0.5f);
|
||||
HashMap<GATKSAMRecord, Boolean[]> readToLocusMap = new HashMap<GATKSAMRecord, Boolean[]>(2*readList.size(), 0.5f);
|
||||
|
||||
|
||||
for (int i = startLocation; i <= stopLocation; i++)
|
||||
locusToReadMap.put(i, new HashSet<GATKSAMRecord>()); // Initialize the locusToRead map with empty lists
|
||||
|
||||
|
|
@ -631,7 +631,7 @@ public class ReadUtils {
|
|||
|
||||
int [] readCoverage = getCoverageDistributionOfRead(read, startLocation, stopLocation);
|
||||
|
||||
for (int i=0; i<readCoverage.length; i++) {
|
||||
for (int i = 0; i < readCoverage.length; i++) {
|
||||
int refLocation = i + startLocation;
|
||||
if (readCoverage[i] > 0) {
|
||||
// Update the hash for this locus
|
||||
|
|
@ -649,6 +649,55 @@ public class ReadUtils {
|
|||
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create random read qualities
|
||||
*
|
||||
* @param length the length of the read
|
||||
* @return an array with randomized base qualities between 0 and 50
|
||||
*/
|
||||
public static byte[] createRandomReadQuals(int length) {
|
||||
Random random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
byte[] quals = new byte[length];
|
||||
for (int i = 0; i < length; i++)
|
||||
quals[i] = (byte) random.nextInt(50);
|
||||
return quals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create random read qualities
|
||||
*
|
||||
* @param length the length of the read
|
||||
* @param allowNs whether or not to allow N's in the read
|
||||
* @return an array with randomized bases (A-N) with equal probability
|
||||
*/
|
||||
public static byte[] createRandomReadBases(int length, boolean allowNs) {
|
||||
Random random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
int numberOfBases = allowNs ? 5 : 4;
|
||||
byte[] bases = new byte[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
switch (random.nextInt(numberOfBases)) {
|
||||
case 0:
|
||||
bases[i] = 'A';
|
||||
break;
|
||||
case 1:
|
||||
bases[i] = 'C';
|
||||
break;
|
||||
case 2:
|
||||
bases[i] = 'G';
|
||||
break;
|
||||
case 3:
|
||||
bases[i] = 'T';
|
||||
break;
|
||||
case 4:
|
||||
bases[i] = 'N';
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Something went wrong, this is just impossible");
|
||||
}
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
|
||||
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
|
||||
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
|
||||
int sequenceRecordIndex = 0;
|
||||
|
|
@ -656,4 +705,5 @@ public class ReadUtils {
|
|||
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
|
||||
return Arrays.deepToString(sequenceRecordNames);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -12,37 +12,13 @@ import java.util.BitSet;
|
|||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p>
|
||||
* [Long description of the walker]
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Description of the Input]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Description of the Output]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/1/12
|
||||
*/
|
||||
public class ContextCovariateUnitTest {
|
||||
ContextCovariate covariate;
|
||||
RecalibrationArgumentCollection RAC;
|
||||
Random random;
|
||||
Random random;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
|
|
@ -55,49 +31,35 @@ public class ContextCovariateUnitTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSimpleContexts() {
|
||||
byte [] quals = createRandomReadQuals(101);
|
||||
byte [] bbases = createRandomReadBases(101);
|
||||
byte[] quals = ReadUtils.createRandomReadQuals(10000);
|
||||
byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
|
||||
String bases = stringFrom(bbases);
|
||||
// System.out.println("Read: " + bases);
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
|
||||
}
|
||||
|
||||
|
||||
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
||||
for (int i=0; i<values.length; i++) {
|
||||
if (i >= contextSize)
|
||||
Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
|
||||
else
|
||||
Assert.assertNull(values[i]);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
String expectedContext = covariate.NO_CONTEXT_VALUE;
|
||||
if (i >= contextSize) {
|
||||
String context = bases.substring(i - contextSize, i);
|
||||
if (!context.contains("N"))
|
||||
expectedContext = context;
|
||||
}
|
||||
// System.out.println(String.format("Context [%d]:\n%s\n%s\n", i, covariate.keyFromBitSet(values[i]), expectedContext));
|
||||
Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
|
||||
}
|
||||
}
|
||||
|
||||
private String stringFrom(byte [] array) {
|
||||
private String stringFrom(byte[] array) {
|
||||
String s = "";
|
||||
for (byte value : array)
|
||||
s += (char) value;
|
||||
return s;
|
||||
}
|
||||
|
||||
private byte [] createRandomReadQuals(int length) {
|
||||
byte [] quals = new byte[length];
|
||||
for (int i=0; i<length; i++)
|
||||
quals[i] = (byte) random.nextInt(50);
|
||||
return quals;
|
||||
}
|
||||
|
||||
private byte [] createRandomReadBases(int length) {
|
||||
byte [] bases = new byte[length];
|
||||
for (int i=0; i<length; i++) {
|
||||
switch(random.nextInt(4)) {
|
||||
case 0: bases[i] = 'A'; break;
|
||||
case 1: bases[i] = 'C'; break;
|
||||
case 2: bases[i] = 'G'; break;
|
||||
case 3: bases[i] = 'T'; break;
|
||||
}
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,68 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/1/12
|
||||
*/
|
||||
public class CycleCovariateUnitTest {
|
||||
CycleCovariate covariate;
|
||||
RecalibrationArgumentCollection RAC;
|
||||
Random random;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
RAC = new RecalibrationArgumentCollection();
|
||||
covariate = new CycleCovariate();
|
||||
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
covariate.initialize(RAC);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSimpleCycles() {
|
||||
short readLength = 10;
|
||||
byte[] quals = ReadUtils.createRandomReadQuals(readLength);
|
||||
byte[] bbases = ReadUtils.createRandomReadBases(readLength, true);
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), (short) 1, (short) 1);
|
||||
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
|
||||
|
||||
read.setReadPairedFlag(true);
|
||||
read.setSecondOfPairFlag(true);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
|
||||
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
|
||||
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(BitSet[] values, short init, short increment) {
|
||||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.keyFromBitSet(values[i]));
|
||||
int expected = init + (increment * i);
|
||||
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/5/12
|
||||
*/
|
||||
|
||||
public class BitSetUtilsUnitTest {
|
||||
private static int RANDOM_NUMBERS_TO_TRY = 87380;
|
||||
private static Random random;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLongBitSet() {
|
||||
long[] numbers = {0L, 1L, 428L, 65536L, 239847L, 4611686018427387903L, Long.MAX_VALUE, Long.MIN_VALUE, -1L, -2L, -7L, -128L, -65536L, -100000L};
|
||||
for (long n : numbers)
|
||||
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);
|
||||
|
||||
for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
|
||||
long n = random.nextLong();
|
||||
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n); // Because class Random uses a seed with only 48 bits, this algorithm will not return all possible long values.
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testShortBitSet() {
|
||||
short[] numbers = {0, 1, 428, 25934, 23847, 16168, Short.MAX_VALUE, Short.MIN_VALUE, -1, -2, -7, -128, -12312, -31432};
|
||||
for (long n : numbers)
|
||||
Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
|
||||
|
||||
for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
|
||||
short n = (short) random.nextInt();
|
||||
Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDNAAndBitSetConversion() {
|
||||
String[] dna = {"AGGTGTTGT", "CCCCCCCCCCCCCC", "GGGGGGGGGGGGGG", "TTTTTTTTTTTTTT", "GTAGACCGATCTCAGCTAGT", "AACGTCAATGCAGTCAAGTCAGACGTGGGTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"};
|
||||
|
||||
// Test all contexts of size 1-8.
|
||||
for (long n = 0; n < RANDOM_NUMBERS_TO_TRY; n++)
|
||||
Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(n)))), n);
|
||||
|
||||
// Test the special cases listed in the dna array
|
||||
for (String d : dna)
|
||||
Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNumberOfBitsToRepresent() {
|
||||
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented
|
||||
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
|
||||
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
|
||||
|
||||
for (int i = 1; i < 63; i++) { // Can't test i == 63 because n1 is a negative number
|
||||
long n1 = 1L << i;
|
||||
long n2 = Math.abs(random.nextLong()) % n1;
|
||||
long n3 = n1 | n2;
|
||||
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
|
||||
Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n1), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -131,7 +130,8 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
|
||||
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
|
||||
|
||||
for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
|
||||
for (int i = 0; i < numbers.length; i++)
|
||||
r.add((double) numbers[i]);
|
||||
|
||||
Assert.assertEquals((long) numbers.length, r.observationCount());
|
||||
Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
|
||||
|
|
@ -223,35 +223,6 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
return set.isEmpty();
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIntAndBitSetConversion() {
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDNAAndBitSetConversion() {
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApproximateLog10SumLog10() {
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
|
|
@ -267,54 +238,54 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalizeFromLog10() {
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, -1.0, -1.1, -7.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, 0.0, -0.1, -6.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[] {-8.9, -6.7, -9.4, 0.0, -8.9}));
|
||||
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.0}), new double[] {0.25, 0.25, 0.25, 0.25}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -1.0}), new double[] {0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Private function used by testNormalizeFromLog10()
|
||||
*/
|
||||
private boolean compareDoubleArrays(double[] b1, double[] b2) {
|
||||
if( b1.length != b2.length ) {
|
||||
if (b1.length != b2.length) {
|
||||
return false; // sanity check
|
||||
}
|
||||
|
||||
for( int i=0; i < b1.length; i++ ){
|
||||
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
|
||||
for (int i = 0; i < b1.length; i++) {
|
||||
if (MathUtils.compareDoubles(b1[i], b2[i]) != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue