No more extraneous array creation in BQSR covariate classes; now covariates push their data directly to the ReadCovariates class as it's calculated (no more going through CovariateValues.java)
This commit is contained in:
parent
0398ae9695
commit
4895fe2289
|
|
@ -32,8 +32,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -54,20 +52,21 @@ public class ContextCovariate implements StandardCovariate {
|
|||
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
|
||||
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
|
||||
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
|
||||
if (mismatchesContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
|
||||
if (insertionsContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("insertions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, insertionsContextSize));
|
||||
if (deletionsContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("deletions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, deletionsContextSize));
|
||||
|
||||
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
|
||||
|
||||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
final long[] mismatches = new long[l];
|
||||
final long[] insertions = new long[l];
|
||||
final long[] deletions = new long[l];
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
|
|
@ -76,18 +75,10 @@ public class ContextCovariate implements StandardCovariate {
|
|||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
for (int i = 0; i < clippedRead.getReadLength(); i++) {
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
final int readLength = clippedRead.getReadLength();
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
values.addCovariate(contextWith(bases, i, mismatchesContextSize), contextWith(bases, i, insertionsContextSize), contextWith(bases, i, deletionsContextSize), (negativeStrand ? readLength - i - 1 : i));
|
||||
}
|
||||
|
||||
if (negativeStrand) {
|
||||
reverse(mismatches);
|
||||
reverse(insertions);
|
||||
reverse(deletions);
|
||||
}
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
@ -106,7 +97,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
@Override
|
||||
public long longFromKey(Object key) {
|
||||
return longFrom((String) key);
|
||||
return keyFromContext((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -123,35 +114,17 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @return the key representing the context
|
||||
*/
|
||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
long result = -1;
|
||||
final int start = offset - contextSize + 1;
|
||||
if (start >= 0) {
|
||||
final byte[] context = Arrays.copyOfRange(bases, start, offset + 1);
|
||||
if (!BaseUtils.containsBase(context, BaseUtils.N))
|
||||
result = keyFromContext(context);
|
||||
}
|
||||
final long result;
|
||||
if (start >= 0)
|
||||
result = keyFromContext(bases, start, offset + 1);
|
||||
else
|
||||
result = -1L;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the given array in place.
|
||||
*
|
||||
* @param array any array
|
||||
*/
|
||||
private static void reverse(final long[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final long temp = array[l];
|
||||
array[l] = array[r];
|
||||
array[r] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
public static long longFrom(final String dna) {
|
||||
return keyFromContext(dna.getBytes());
|
||||
public static long keyFromContext(final String dna) {
|
||||
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -172,39 +145,38 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param dna the dna sequence
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
public static long keyFromContext(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (final byte base : dna) {
|
||||
baseTen = baseTen << 2; // multiply by 4
|
||||
baseTen += (long)BaseUtils.simpleBaseToBaseIndex(base);
|
||||
public static long keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (int i = start; i < end; i++) {
|
||||
baseTen = (baseTen << 2); // multiply by 4
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1L;
|
||||
baseTen += (long)baseIndex;
|
||||
}
|
||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
static {
|
||||
for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++)
|
||||
computeCombinationsFor(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
* @return the sum of all combinations leading up to this context length.
|
||||
*/
|
||||
private static long combinationsFor(int length) {
|
||||
if (length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
||||
|
||||
// only calculate the number of combinations if the table hasn't already cached the value
|
||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
return combinationsPerLength[length];
|
||||
private static void computeCombinationsFor(final int length) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -232,7 +204,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
key -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
|
|
@ -259,11 +231,11 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(final long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsFor(length); // calculate the next context
|
||||
combinations = combinationsPerLength[length]; // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,10 +48,10 @@ public interface Covariate {
|
|||
/**
|
||||
* Calculates covariate values for all positions in the read.
|
||||
*
|
||||
* @param read the read to calculate the covariates on.
|
||||
* @return all the covariate values for every base in the read.
|
||||
* @param read the read to calculate the covariates on.
|
||||
* @param values the object to record the covariate values for every base in the read.
|
||||
*/
|
||||
public CovariateValues getValues(final GATKSAMRecord read);
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
|
||||
|
||||
/**
|
||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
|
|||
|
|
@ -1,37 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/**
|
||||
* An object to hold the different covariate values for all bases in the read.
|
||||
*
|
||||
* Currently we have three different covariates for each read:
|
||||
* - Mismatch
|
||||
* - Insertion
|
||||
* - Deletion
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private final long[] mismatches;
|
||||
private final long[] insertions;
|
||||
private final long[] deletions;
|
||||
|
||||
public CovariateValues(final long[] mismatch, final long[] insertion, final long[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public long[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public long[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public long[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -58,9 +58,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final int readLength = read.getReadLength();
|
||||
final long[] cycles = new long[readLength];
|
||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||
|
||||
// Discrete cycle platforms
|
||||
|
|
@ -79,12 +78,11 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long key = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, i);
|
||||
cycle += increment;
|
||||
}
|
||||
for (int i = MAX_CYCLE; i < readLength; i++)
|
||||
cycles[i] = -1L;
|
||||
}
|
||||
|
||||
// Flow cycle platforms
|
||||
|
|
@ -108,19 +106,23 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
if (iii < readLength) {
|
||||
|
|
@ -130,7 +132,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
||||
|
|
@ -140,19 +143,23 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
if (iii >= 0) {
|
||||
|
|
@ -162,7 +169,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
}
|
||||
|
|
@ -173,8 +181,6 @@ public class CycleCovariate implements StandardCovariate {
|
|||
else {
|
||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||
}
|
||||
|
||||
return new CovariateValues(cycles, cycles, cycles);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
|
|||
|
|
@ -40,28 +40,17 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
}
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int readLength = read.getReadLength();
|
||||
|
||||
long[] mismatches = new long[readLength];
|
||||
long[] insertions = new long[readLength];
|
||||
long[] deletions = new long[readLength];
|
||||
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final byte[] baseQualities = read.getBaseQualities();
|
||||
final byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
mismatches[i] = (long)baseQualities[i];
|
||||
insertions[i] = (long)baseInsertionQualities[i];
|
||||
deletions[i] = (long)baseDeletionQualities[i];
|
||||
values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i);
|
||||
}
|
||||
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
|
|||
|
|
@ -15,24 +15,22 @@ public class ReadCovariates {
|
|||
private final long[][] insertionsKeySet;
|
||||
private final long[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
private int currentCovariateIndex = 0;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.nextCovariateIndex = 0;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
nextCovariateIndex = 0;
|
||||
public void setCovariateIndex(final int index) {
|
||||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
public void addCovariate(CovariateValues covariate) {
|
||||
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
|
||||
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
|
||||
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
|
||||
nextCovariateIndex++;
|
||||
public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) {
|
||||
mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch;
|
||||
insertionsKeySet[readOffset][currentCovariateIndex] = insertion;
|
||||
deletionsKeySet[readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
|
|
@ -60,11 +58,6 @@ public class ReadCovariates {
|
|||
return deletionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
private void transposeCovariateValues(final long[][] keySet, final long[] covariateValues) {
|
||||
for (int i = 0; i < covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
|
|
@ -50,13 +49,13 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
long[] readGroups = new long[l];
|
||||
Arrays.fill(readGroups, key);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
|
||||
final int l = read.getReadLength();
|
||||
for (int i = 0; i < l; i++)
|
||||
values.addCovariate(key, key, key, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -631,8 +631,10 @@ public class RecalDataManager {
|
|||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
readCovariates.addCovariate(covariate.getValues(read));
|
||||
for (int i = 0; i < requestedCovariates.length; i++) {
|
||||
readCovariates.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, readCovariates);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -92,7 +93,6 @@ public class BaseRecalibration {
|
|||
* @param read the read to recalibrate
|
||||
*/
|
||||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
readCovariates.reset();
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
|
|
@ -181,7 +181,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
|
||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,16 +80,22 @@ public class BQSRKeyManagerUnitTest {
|
|||
}
|
||||
|
||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][];
|
||||
int i = 0;
|
||||
for (Covariate cov : covariateList) {
|
||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()];
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size());
|
||||
for (int i = 0; i < covariateList.size(); i++) {
|
||||
final Covariate cov = covariateList.get(i);
|
||||
cov.initialize(RAC);
|
||||
CovariateValues covValues = cov.getValues(read);
|
||||
covariateKeys[i][EventType.BASE_SUBSTITUTION.index] = covValues.getMismatches();
|
||||
covariateKeys[i][EventType.BASE_INSERTION.index] = covValues.getInsertions();
|
||||
covariateKeys[i][EventType.BASE_DELETION.index] = covValues.getDeletions();
|
||||
i++;
|
||||
readCovariates.setCovariateIndex(i);
|
||||
cov.recordValues(read, readCovariates);
|
||||
}
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
for (EventType eventType : EventType.values()) {
|
||||
final long[] vals = readCovariates.getKeySet(i, eventType);
|
||||
for (int j = 0; j < vals.length; j++)
|
||||
covariateKeys[j][eventType.index][i] = vals[j];
|
||||
}
|
||||
}
|
||||
|
||||
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||
|
||||
|
|
|
|||
|
|
@ -28,15 +28,17 @@ public class ContextCovariateUnitTest {
|
|||
public void testSimpleContexts() {
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
|
||||
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
|
||||
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
|
||||
verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
}
|
||||
|
||||
public static void verifyCovariateArray(long[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
for (int i = 0; i < values.length; i++)
|
||||
Assert.assertEquals(contextCovariate.formatKey(values[i]), expectedContext(read, i, contextSize));
|
||||
Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,25 +30,26 @@ public class CycleCovariateUnitTest {
|
|||
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), 1, (short) 1);
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), 1, (short) 1);
|
||||
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), readLength, -1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), readLength, -1);
|
||||
|
||||
read.setSecondOfPairFlag(true);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), -readLength, 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -readLength, 1);
|
||||
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), -1, -1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1);
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[] values, int init, int increment) {
|
||||
private void verifyCovariateArray(long[][] values, int init, int increment) {
|
||||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.formatKey(values[i]));
|
||||
short actual = Short.decode(covariate.formatKey(values[i][0]));
|
||||
int expected = init + (increment * i);
|
||||
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
||||
Assert.assertEquals(actual, expected);
|
||||
|
|
|
|||
|
|
@ -40,14 +40,15 @@ public class ReadGroupCovariateUnitTest {
|
|||
private void runTest(GATKSAMReadGroupRecord rg, String expected) {
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(10);
|
||||
read.setReadGroup(rg);
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray(values.getMismatches(), expected);
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected);
|
||||
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[] values, String expected) {
|
||||
for (Long value : values) {
|
||||
String actual = covariate.formatKey(value);
|
||||
private void verifyCovariateArray(long[][] values, String expected) {
|
||||
for (long[] value : values) {
|
||||
String actual = covariate.formatKey(value[0]);
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue