Bug fixes for cycle and context covariates

This commit is contained in:
Eric Banks 2012-06-14 13:01:14 -04:00
parent 5c3c6cbc40
commit de5508fcea
2 changed files with 18 additions and 16 deletions

View File

@ -111,7 +111,7 @@ public class ContextCovariate implements StandardCovariate {
@Override @Override
public int numberOfBits() { public int numberOfBits() {
return Integer.bitCount(1); return Integer.bitCount(Integer.MAX_VALUE);
} }
/** /**

View File

@ -59,17 +59,18 @@ public class CycleCovariate implements StandardCovariate {
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
@Override @Override
public CovariateValues getValues(final GATKSAMRecord read) { public CovariateValues getValues(final GATKSAMRecord read) {
long[] cycles = new long[read.getReadLength()]; final int readLength = read.getReadLength();
final long[] cycles = new long[readLength];
final NGSPlatform ngsPlatform = read.getNGSPlatform(); final NGSPlatform ngsPlatform = read.getNGSPlatform();
// Discrete cycle platforms // Discrete cycle platforms
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) { if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
final short readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? (short) -1 : 1; final int readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? -1 : 1;
final short increment; final int increment;
short cycle; int cycle;
if (read.getReadNegativeStrandFlag()) { if (read.getReadNegativeStrandFlag()) {
cycle = (short) (read.getReadLength() * readOrderFactor); cycle = readLength * readOrderFactor;
increment = (short) (-1 * readOrderFactor); increment = -1 * readOrderFactor;
} }
else { else {
cycle = readOrderFactor; cycle = readOrderFactor;
@ -77,17 +78,18 @@ public class CycleCovariate implements StandardCovariate {
} }
final int CUSHION = 4; final int CUSHION = 4;
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1; final int MAX_CYCLE = readLength - CUSHION - 1;
for (int i = 0; i < MAX_CYCLE; i++) { for (int i = 0; i < MAX_CYCLE; i++) {
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? -1 : keyFromCycle(cycle); cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
cycle += increment; cycle += increment;
} }
for (int i = MAX_CYCLE; i < readLength; i++)
cycles[i] = -1L;
} }
// Flow cycle platforms // Flow cycle platforms
else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) { else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) {
final int readLength = read.getReadLength();
final byte[] bases = read.getReadBases(); final byte[] bases = read.getReadBases();
// Differentiate between first and second of pair. // Differentiate between first and second of pair.
@ -98,7 +100,7 @@ public class CycleCovariate implements StandardCovariate {
// the current sequential model would consider the effects independently instead of jointly. // the current sequential model would consider the effects independently instead of jointly.
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag(); final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
short cycle = multiplyByNegative1 ? (short) -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms. int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -178,7 +180,7 @@ public class CycleCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration // Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override @Override
public final Object getValue(final String str) { public final Object getValue(final String str) {
return Short.parseShort(str); return Integer.parseInt(str);
} }
@Override @Override
@ -191,16 +193,16 @@ public class CycleCovariate implements StandardCovariate {
@Override @Override
public long longFromKey(final Object key) { public long longFromKey(final Object key) {
return (key instanceof String) ? keyFromCycle(Short.parseShort((String) key)) : keyFromCycle((Short) key); return (key instanceof String) ? keyFromCycle(Integer.parseInt((String) key)) : keyFromCycle((Integer) key);
} }
@Override @Override
public int numberOfBits() { public int numberOfBits() {
return BQSRKeyManager.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative return Integer.bitCount(Integer.MAX_VALUE);
} }
private static long keyFromCycle(final short cycle) { private static long keyFromCycle(final int cycle) {
// no negative values because // no negative values because values must fit into the first few bits of the long
long result = Math.abs(cycle); long result = Math.abs(cycle);
result = result << 1; // shift so we can add the "sign" bit result = result << 1; // shift so we can add the "sign" bit
if ( cycle < 0 ) if ( cycle < 0 )