Ryan confirmed that we don't need separate arguments to control the context size for insertions and deletions, which allows us to cut down the expensive context calculations.

This commit is contained in:
Eric Banks 2012-06-15 09:28:56 -04:00
parent 61fcbcb190
commit c54e84e739
6 changed files with 20 additions and 34 deletions

View File

@ -41,8 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class ContextCovariate implements StandardCovariate {
private int mismatchesContextSize;
private int insertionsContextSize;
private int deletionsContextSize;
private int indelsContextSize;
private byte LOW_QUAL_TAIL;
@ -50,19 +49,16 @@ public class ContextCovariate implements StandardCovariate {
@Override
public void initialize(final RecalibrationArgumentCollection RAC) {
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
indelsContextSize = RAC.INDELS_CONTEXT_SIZE;
if (mismatchesContextSize > MAX_DNA_CONTEXT)
throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
if (insertionsContextSize > MAX_DNA_CONTEXT)
throw new UserException.BadArgumentValue("insertions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, insertionsContextSize));
if (deletionsContextSize > MAX_DNA_CONTEXT)
throw new UserException.BadArgumentValue("deletions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, deletionsContextSize));
if (indelsContextSize > MAX_DNA_CONTEXT)
throw new UserException.BadArgumentValue("indels_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, indelsContextSize));
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
if (mismatchesContextSize <= 0 || indelsContextSize <= 0)
throw new UserException(String.format("Context size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Indels: %d", mismatchesContextSize, indelsContextSize));
}
@Override
@ -77,7 +73,8 @@ public class ContextCovariate implements StandardCovariate {
final int readLength = clippedRead.getReadLength();
for (int i = 0; i < readLength; i++) {
values.addCovariate(contextWith(bases, i, mismatchesContextSize), contextWith(bases, i, insertionsContextSize), contextWith(bases, i, deletionsContextSize), (negativeStrand ? readLength - i - 1 : i));
final long indelKey = contextWith(bases, i, indelsContextSize);
values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
}
}

View File

@ -114,16 +114,10 @@ public class RecalibrationArgumentCollection {
public int MISMATCHES_CONTEXT_SIZE = 2;
/**
* The context covariate will use a context of this size to calculate it's covariate value for base insertions
* The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
*/
@Argument(fullName = "insertions_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions", required = false)
public int INSERTIONS_CONTEXT_SIZE = 8;
/**
* The context covariate will use a context of this size to calculate it's covariate value for base deletions
*/
@Argument(fullName = "deletions_context_size", shortName = "dcs", doc = "size of the k-mer context to be used for base deletions", required = false)
public int DELETIONS_CONTEXT_SIZE = 8;
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
public int INDELS_CONTEXT_SIZE = 8;
/**
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
@ -188,10 +182,8 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true);
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("insertions_context_size", true);
argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE);
argumentsTable.addRowID("deletions_context_size", true);
argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true);
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);

View File

@ -266,11 +266,8 @@ public class RecalibrationReport {
else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (argument.equals("insertions_context_size"))
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (argument.equals("deletions_context_size"))
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (argument.equals("indels_context_size"))
RAC.INDELS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (argument.equals("mismatches_default_quality"))
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);

View File

@ -32,8 +32,8 @@ public class ContextCovariateUnitTest {
covariate.recordValues(read, readCovariates);
verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate);
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate);
}
public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {

View File

@ -61,8 +61,8 @@ public class ReadCovariatesUnitTest {
// check context
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
// check cycle
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));

View File

@ -76,7 +76,7 @@ public class RecalibrationReportUnitTest {
read.setBaseQualities(readQuals);
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INSERTIONS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
int nKeys = 0; // keep track of how many keys were produced
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
for (int offset = 0; offset < length; offset++) {