The user can now set the maximum allowable cycle on the command-line with --maximum_cycle_value. This value is (now) enforced in the Cycle covariate and a User Error is thrown if the maximum value is passed (with a helpful error message). Added unit tests to cover this new functionality.

This commit is contained in:
Eric Banks 2012-11-20 22:41:57 -05:00
parent ff87642a91
commit 72e2d569c5
3 changed files with 44 additions and 6 deletions

View File

@ -102,13 +102,10 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
/////////////////////////////
// Debugging-only Arguments
/////////////////////////////
/**
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
*/
@Hidden
@Advanced
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
public boolean RUN_WITHOUT_DBSNP = false;
@ -139,6 +136,13 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
public int INDELS_CONTEXT_SIZE = 3;
/**
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
* This argument is ignored if the Cycle covariate is not used.
*/
@Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
public int MAXIMUM_CYCLE_VALUE = 500;
/**
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
*/
@ -176,9 +180,15 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
public String BINARY_TAG_NAME = null;
/////////////////////////////
// Debugging-only Arguments
/////////////////////////////
@Hidden
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = null;
@Hidden
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;

View File

@ -49,7 +49,7 @@ import java.util.EnumSet;
public class CycleCovariate implements StandardCovariate {
private static final int MAXIMUM_CYCLE_VALUE = 1000;
private int MAXIMUM_CYCLE_VALUE;
private static final int CUSHION_FOR_INDELS = 4;
private String default_platform = null;
@ -59,6 +59,8 @@ public class CycleCovariate implements StandardCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize(final RecalibrationArgumentCollection RAC) {
this.MAXIMUM_CYCLE_VALUE = RAC.MAXIMUM_CYCLE_VALUE;
if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");
@ -88,6 +90,9 @@ public class CycleCovariate implements StandardCovariate {
final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
for (int i = 0; i < readLength; i++) {
if ( cycle > MAXIMUM_CYCLE_VALUE )
throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle was detected in read " + read.getReadName() + ". Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
final int substitutionKey = keyFromCycle(cycle);
final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
values.addCovariate(substitutionKey, indelKey, indelKey, i);

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -53,9 +54,31 @@ public class CycleCovariateUnitTest {
for (short i = 0; i < values.length; i++) {
short actual = Short.decode(covariate.formatKey(values[i][0]));
int expected = init + (increment * i);
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
Assert.assertEquals(actual, expected);
}
}
@Test(enabled = true, expectedExceptions={UserException.class})
public void testMoreThanMaxCycleFails() {
int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1;
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
read.setReadPairedFlag(true);
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
read.getReadGroup().setPlatform("illumina");
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
covariate.recordValues(read, readCovariates);
}
@Test(enabled = true)
public void testMaxCyclePasses() {
int readLength = RAC.MAXIMUM_CYCLE_VALUE;
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
read.setReadPairedFlag(true);
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
read.getReadGroup().setPlatform("illumina");
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
covariate.recordValues(read, readCovariates);
}
}