No more extraneous array creation in BQSR covariate classes; now covariates push their data directly to the ReadCovariates class as it's calculated (no more going through CovariateValues.java)
This commit is contained in:
parent
0398ae9695
commit
4895fe2289
|
|
@ -32,8 +32,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
@ -54,20 +52,21 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
|
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
|
||||||
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
|
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
|
||||||
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
|
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
|
||||||
|
if (mismatchesContextSize > MAX_DNA_CONTEXT)
|
||||||
|
throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
|
||||||
|
if (insertionsContextSize > MAX_DNA_CONTEXT)
|
||||||
|
throw new UserException.BadArgumentValue("insertions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, insertionsContextSize));
|
||||||
|
if (deletionsContextSize > MAX_DNA_CONTEXT)
|
||||||
|
throw new UserException.BadArgumentValue("deletions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, deletionsContextSize));
|
||||||
|
|
||||||
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
|
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
|
||||||
|
|
||||||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||||
final int l = read.getReadLength();
|
|
||||||
final long[] mismatches = new long[l];
|
|
||||||
final long[] insertions = new long[l];
|
|
||||||
final long[] deletions = new long[l];
|
|
||||||
|
|
||||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||||
|
|
||||||
|
|
@ -76,18 +75,10 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
if (negativeStrand)
|
if (negativeStrand)
|
||||||
bases = BaseUtils.simpleReverseComplement(bases);
|
bases = BaseUtils.simpleReverseComplement(bases);
|
||||||
|
|
||||||
for (int i = 0; i < clippedRead.getReadLength(); i++) {
|
final int readLength = clippedRead.getReadLength();
|
||||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
for (int i = 0; i < readLength; i++) {
|
||||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
values.addCovariate(contextWith(bases, i, mismatchesContextSize), contextWith(bases, i, insertionsContextSize), contextWith(bases, i, deletionsContextSize), (negativeStrand ? readLength - i - 1 : i));
|
||||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (negativeStrand) {
|
|
||||||
reverse(mismatches);
|
|
||||||
reverse(insertions);
|
|
||||||
reverse(deletions);
|
|
||||||
}
|
|
||||||
return new CovariateValues(mismatches, insertions, deletions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
|
@ -106,7 +97,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long longFromKey(Object key) {
|
public long longFromKey(Object key) {
|
||||||
return longFrom((String) key);
|
return keyFromContext((String) key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -123,35 +114,17 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
* @return the key representing the context
|
* @return the key representing the context
|
||||||
*/
|
*/
|
||||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||||
long result = -1;
|
|
||||||
final int start = offset - contextSize + 1;
|
final int start = offset - contextSize + 1;
|
||||||
if (start >= 0) {
|
final long result;
|
||||||
final byte[] context = Arrays.copyOfRange(bases, start, offset + 1);
|
if (start >= 0)
|
||||||
if (!BaseUtils.containsBase(context, BaseUtils.N))
|
result = keyFromContext(bases, start, offset + 1);
|
||||||
result = keyFromContext(context);
|
else
|
||||||
}
|
result = -1L;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public static long keyFromContext(final String dna) {
|
||||||
* Reverses the given array in place.
|
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||||
*
|
|
||||||
* @param array any array
|
|
||||||
*/
|
|
||||||
private static void reverse(final long[] array) {
|
|
||||||
final int arrayLength = array.length;
|
|
||||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
|
||||||
final long temp = array[l];
|
|
||||||
array[l] = array[r];
|
|
||||||
array[r] = temp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
|
||||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
|
||||||
|
|
||||||
public static long longFrom(final String dna) {
|
|
||||||
return keyFromContext(dna.getBytes());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -172,40 +145,39 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
* @param dna the dna sequence
|
* @param dna the dna sequence
|
||||||
* @return the key representing the dna sequence
|
* @return the key representing the dna sequence
|
||||||
*/
|
*/
|
||||||
public static long keyFromContext(final byte[] dna) {
|
public static long keyFromContext(final byte[] dna, final int start, final int end) {
|
||||||
if (dna.length > MAX_DNA_CONTEXT)
|
final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string
|
||||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
|
||||||
|
|
||||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
|
||||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||||
for (final byte base : dna) {
|
for (int i = start; i < end; i++) {
|
||||||
baseTen = baseTen << 2; // multiply by 4
|
baseTen = (baseTen << 2); // multiply by 4
|
||||||
baseTen += (long)BaseUtils.simpleBaseToBaseIndex(base);
|
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||||
|
if (baseIndex == -1) // ignore non-ACGT bases
|
||||||
|
return -1L;
|
||||||
|
baseTen += (long)baseIndex;
|
||||||
}
|
}
|
||||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||||
|
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||||
|
static {
|
||||||
|
for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++)
|
||||||
|
computeCombinationsFor(i);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||||
*
|
*
|
||||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||||
*
|
*
|
||||||
* @param length the length of the DNA context
|
* @param length the length of the DNA context
|
||||||
* @return the sum of all combinations leading up to this context length.
|
|
||||||
*/
|
*/
|
||||||
private static long combinationsFor(int length) {
|
private static void computeCombinationsFor(final int length) {
|
||||||
if (length > MAX_DNA_CONTEXT)
|
|
||||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
|
||||||
|
|
||||||
// only calculate the number of combinations if the table hasn't already cached the value
|
|
||||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
|
||||||
long combinations = 0L;
|
long combinations = 0L;
|
||||||
for (int i = 1; i <= length; i++)
|
for (int i = 1; i <= length; i++)
|
||||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||||
combinationsPerLength[length] = combinations;
|
combinationsPerLength[length] = combinations;
|
||||||
}
|
}
|
||||||
return combinationsPerLength[length];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a key into the dna string representation.
|
* Converts a key into the dna string representation.
|
||||||
|
|
@ -232,7 +204,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||||
|
|
||||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||||
key -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||||
|
|
||||||
StringBuilder dna = new StringBuilder();
|
StringBuilder dna = new StringBuilder();
|
||||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||||
|
|
@ -260,10 +232,10 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
*/
|
*/
|
||||||
private static int contextLengthFor(final long number) {
|
private static int contextLengthFor(final long number) {
|
||||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it).
|
||||||
while (combinations <= number) { // find the length of the dna string (length)
|
while (combinations <= number) { // find the length of the dna string (length)
|
||||||
length++;
|
length++;
|
||||||
combinations = combinationsFor(length); // calculate the next context
|
combinations = combinationsPerLength[length]; // calculate the next context
|
||||||
}
|
}
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,9 @@ public interface Covariate {
|
||||||
* Calculates covariate values for all positions in the read.
|
* Calculates covariate values for all positions in the read.
|
||||||
*
|
*
|
||||||
* @param read the read to calculate the covariates on.
|
* @param read the read to calculate the covariates on.
|
||||||
* @return all the covariate values for every base in the read.
|
* @param values the object to record the covariate values for every base in the read.
|
||||||
*/
|
*/
|
||||||
public CovariateValues getValues(final GATKSAMRecord read);
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An object to hold the different covariate values for all bases in the read.
|
|
||||||
*
|
|
||||||
* Currently we have three different covariates for each read:
|
|
||||||
* - Mismatch
|
|
||||||
* - Insertion
|
|
||||||
* - Deletion
|
|
||||||
*
|
|
||||||
* @author Mauricio Carneiro
|
|
||||||
* @since 2/8/12
|
|
||||||
*/
|
|
||||||
public class CovariateValues {
|
|
||||||
private final long[] mismatches;
|
|
||||||
private final long[] insertions;
|
|
||||||
private final long[] deletions;
|
|
||||||
|
|
||||||
public CovariateValues(final long[] mismatch, final long[] insertion, final long[] deletion) {
|
|
||||||
this.mismatches = mismatch;
|
|
||||||
this.insertions = insertion;
|
|
||||||
this.deletions = deletion;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long[] getMismatches() {
|
|
||||||
return mismatches;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long[] getInsertions() {
|
|
||||||
return insertions;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long[] getDeletions() {
|
|
||||||
return deletions;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -58,9 +58,8 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||||
final int readLength = read.getReadLength();
|
final int readLength = read.getReadLength();
|
||||||
final long[] cycles = new long[readLength];
|
|
||||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||||
|
|
||||||
// Discrete cycle platforms
|
// Discrete cycle platforms
|
||||||
|
|
@ -79,12 +78,11 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
final int CUSHION = 4;
|
final int CUSHION = 4;
|
||||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
final int MAX_CYCLE = readLength - CUSHION - 1;
|
||||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
final long key = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, i);
|
||||||
cycle += increment;
|
cycle += increment;
|
||||||
}
|
}
|
||||||
for (int i = MAX_CYCLE; i < readLength; i++)
|
|
||||||
cycles[i] = -1L;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flow cycle platforms
|
// Flow cycle platforms
|
||||||
|
|
@ -108,19 +106,23 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
int iii = 0;
|
int iii = 0;
|
||||||
while (iii < readLength) {
|
while (iii < readLength) {
|
||||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
if (iii < readLength) {
|
if (iii < readLength) {
|
||||||
|
|
@ -130,7 +132,8 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
cycle++;
|
cycle++;
|
||||||
}
|
}
|
||||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -140,19 +143,23 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
int iii = readLength - 1;
|
int iii = readLength - 1;
|
||||||
while (iii >= 0) {
|
while (iii >= 0) {
|
||||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii--;
|
iii--;
|
||||||
}
|
}
|
||||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii--;
|
iii--;
|
||||||
}
|
}
|
||||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii--;
|
iii--;
|
||||||
}
|
}
|
||||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii--;
|
iii--;
|
||||||
}
|
}
|
||||||
if (iii >= 0) {
|
if (iii >= 0) {
|
||||||
|
|
@ -162,7 +169,8 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
cycle++;
|
cycle++;
|
||||||
}
|
}
|
||||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||||
cycles[iii] = keyFromCycle(cycle);
|
final long key = keyFromCycle(cycle);
|
||||||
|
values.addCovariate(key, key, key, iii);
|
||||||
iii--;
|
iii--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -173,8 +181,6 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
else {
|
else {
|
||||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CovariateValues(cycles, cycles, cycles);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
|
|
||||||
|
|
@ -40,28 +40,17 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||||
int readLength = read.getReadLength();
|
final byte[] baseQualities = read.getBaseQualities();
|
||||||
|
final byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||||
long[] mismatches = new long[readLength];
|
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||||
long[] insertions = new long[readLength];
|
|
||||||
long[] deletions = new long[readLength];
|
|
||||||
|
|
||||||
byte[] baseQualities = read.getBaseQualities();
|
|
||||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
|
||||||
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
|
||||||
|
|
||||||
for (int i = 0; i < baseQualities.length; i++) {
|
for (int i = 0; i < baseQualities.length; i++) {
|
||||||
mismatches[i] = (long)baseQualities[i];
|
values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i);
|
||||||
insertions[i] = (long)baseInsertionQualities[i];
|
|
||||||
deletions[i] = (long)baseDeletionQualities[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CovariateValues(mismatches, insertions, deletions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
|
|
||||||
|
|
@ -15,24 +15,22 @@ public class ReadCovariates {
|
||||||
private final long[][] insertionsKeySet;
|
private final long[][] insertionsKeySet;
|
||||||
private final long[][] deletionsKeySet;
|
private final long[][] deletionsKeySet;
|
||||||
|
|
||||||
private int nextCovariateIndex;
|
private int currentCovariateIndex = 0;
|
||||||
|
|
||||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||||
this.nextCovariateIndex = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void setCovariateIndex(final int index) {
|
||||||
nextCovariateIndex = 0;
|
currentCovariateIndex = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addCovariate(CovariateValues covariate) {
|
public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) {
|
||||||
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
|
mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch;
|
||||||
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
|
insertionsKeySet[readOffset][currentCovariateIndex] = insertion;
|
||||||
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
|
deletionsKeySet[readOffset][currentCovariateIndex] = deletion;
|
||||||
nextCovariateIndex++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||||
|
|
@ -60,11 +58,6 @@ public class ReadCovariates {
|
||||||
return deletionsKeySet[readPosition];
|
return deletionsKeySet[readPosition];
|
||||||
}
|
}
|
||||||
|
|
||||||
private void transposeCovariateValues(final long[][] keySet, final long[] covariateValues) {
|
|
||||||
for (int i = 0; i < covariateValues.length; i++)
|
|
||||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Testing routines
|
* Testing routines
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -50,13 +49,13 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||||
final int l = read.getReadLength();
|
|
||||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||||
final long key = keyForReadGroup(readGroupId);
|
final long key = keyForReadGroup(readGroupId);
|
||||||
long[] readGroups = new long[l];
|
|
||||||
Arrays.fill(readGroups, key);
|
final int l = read.getReadLength();
|
||||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
for (int i = 0; i < l; i++)
|
||||||
|
values.addCovariate(key, key, key, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -631,8 +631,10 @@ public class RecalDataManager {
|
||||||
*/
|
*/
|
||||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
for (final Covariate covariate : requestedCovariates)
|
for (int i = 0; i < requestedCovariates.length; i++) {
|
||||||
readCovariates.addCovariate(covariate.getValues(read));
|
readCovariates.setCovariateIndex(i);
|
||||||
|
requestedCovariates[i].recordValues(read, readCovariates);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -92,7 +93,6 @@ public class BaseRecalibration {
|
||||||
* @param read the read to recalibrate
|
* @param read the read to recalibrate
|
||||||
*/
|
*/
|
||||||
public void recalibrateRead(final GATKSAMRecord read) {
|
public void recalibrateRead(final GATKSAMRecord read) {
|
||||||
readCovariates.reset();
|
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
final byte[] quals = read.getBaseQualities(errorModel);
|
final byte[] quals = read.getBaseQualities(errorModel);
|
||||||
|
|
@ -181,7 +181,7 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||||
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
|
||||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,16 +80,22 @@ public class BQSRKeyManagerUnitTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][];
|
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()];
|
||||||
int i = 0;
|
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size());
|
||||||
for (Covariate cov : covariateList) {
|
for (int i = 0; i < covariateList.size(); i++) {
|
||||||
|
final Covariate cov = covariateList.get(i);
|
||||||
cov.initialize(RAC);
|
cov.initialize(RAC);
|
||||||
CovariateValues covValues = cov.getValues(read);
|
readCovariates.setCovariateIndex(i);
|
||||||
covariateKeys[i][EventType.BASE_SUBSTITUTION.index] = covValues.getMismatches();
|
cov.recordValues(read, readCovariates);
|
||||||
covariateKeys[i][EventType.BASE_INSERTION.index] = covValues.getInsertions();
|
|
||||||
covariateKeys[i][EventType.BASE_DELETION.index] = covValues.getDeletions();
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
|
for (EventType eventType : EventType.values()) {
|
||||||
|
final long[] vals = readCovariates.getKeySet(i, eventType);
|
||||||
|
for (int j = 0; j < vals.length; j++)
|
||||||
|
covariateKeys[j][eventType.index][i] = vals[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||||
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,15 +28,17 @@ public class ContextCovariateUnitTest {
|
||||||
public void testSimpleContexts() {
|
public void testSimpleContexts() {
|
||||||
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
|
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||||
CovariateValues values = covariate.getValues(read);
|
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||||
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
|
covariate.recordValues(read, readCovariates);
|
||||||
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
|
|
||||||
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
|
verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
|
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void verifyCovariateArray(long[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||||
for (int i = 0; i < values.length; i++)
|
for (int i = 0; i < values.length; i++)
|
||||||
Assert.assertEquals(contextCovariate.formatKey(values[i]), expectedContext(read, i, contextSize));
|
Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,25 +30,26 @@ public class CycleCovariateUnitTest {
|
||||||
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
||||||
read.getReadGroup().setPlatform("illumina");
|
read.getReadGroup().setPlatform("illumina");
|
||||||
|
|
||||||
CovariateValues values = covariate.getValues(read);
|
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||||
verifyCovariateArray(values.getMismatches(), 1, (short) 1);
|
covariate.recordValues(read, readCovariates);
|
||||||
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), 1, (short) 1);
|
||||||
|
|
||||||
read.setReadNegativeStrandFlag(true);
|
read.setReadNegativeStrandFlag(true);
|
||||||
values = covariate.getValues(read);
|
covariate.recordValues(read, readCovariates);
|
||||||
verifyCovariateArray(values.getMismatches(), readLength, -1);
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), readLength, -1);
|
||||||
|
|
||||||
read.setSecondOfPairFlag(true);
|
read.setSecondOfPairFlag(true);
|
||||||
values = covariate.getValues(read);
|
covariate.recordValues(read, readCovariates);
|
||||||
verifyCovariateArray(values.getMismatches(), -readLength, 1);
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -readLength, 1);
|
||||||
|
|
||||||
read.setReadNegativeStrandFlag(false);
|
read.setReadNegativeStrandFlag(false);
|
||||||
values = covariate.getValues(read);
|
covariate.recordValues(read, readCovariates);
|
||||||
verifyCovariateArray(values.getMismatches(), -1, -1);
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyCovariateArray(long[] values, int init, int increment) {
|
private void verifyCovariateArray(long[][] values, int init, int increment) {
|
||||||
for (short i = 0; i < values.length; i++) {
|
for (short i = 0; i < values.length; i++) {
|
||||||
short actual = Short.decode(covariate.formatKey(values[i]));
|
short actual = Short.decode(covariate.formatKey(values[i][0]));
|
||||||
int expected = init + (increment * i);
|
int expected = init + (increment * i);
|
||||||
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
||||||
Assert.assertEquals(actual, expected);
|
Assert.assertEquals(actual, expected);
|
||||||
|
|
|
||||||
|
|
@ -40,14 +40,15 @@ public class ReadGroupCovariateUnitTest {
|
||||||
private void runTest(GATKSAMReadGroupRecord rg, String expected) {
|
private void runTest(GATKSAMReadGroupRecord rg, String expected) {
|
||||||
GATKSAMRecord read = ReadUtils.createRandomRead(10);
|
GATKSAMRecord read = ReadUtils.createRandomRead(10);
|
||||||
read.setReadGroup(rg);
|
read.setReadGroup(rg);
|
||||||
CovariateValues values = covariate.getValues(read);
|
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||||
verifyCovariateArray(values.getMismatches(), expected);
|
covariate.recordValues(read, readCovariates);
|
||||||
|
verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyCovariateArray(long[] values, String expected) {
|
private void verifyCovariateArray(long[][] values, String expected) {
|
||||||
for (Long value : values) {
|
for (long[] value : values) {
|
||||||
String actual = covariate.formatKey(value);
|
String actual = covariate.formatKey(value[0]);
|
||||||
Assert.assertEquals(actual, expected);
|
Assert.assertEquals(actual, expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue