The user can now set the maximum allowable cycle on the command-line with --maximum_cycle_value. This value is (now) enforced in the Cycle covariate and a User Error is thrown if the maximum value is passed (with a helpful error message). Added unit tests to cover this new functionality.
This commit is contained in:
parent
ff87642a91
commit
72e2d569c5
|
|
@ -102,13 +102,10 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
|
||||
public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
/**
|
||||
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||
*/
|
||||
@Hidden
|
||||
@Advanced
|
||||
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||
public boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
|
|
@ -139,6 +136,13 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
|
||||
public int INDELS_CONTEXT_SIZE = 3;
|
||||
|
||||
/**
|
||||
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
|
||||
* This argument is ignored if the Cycle covariate is not used.
|
||||
*/
|
||||
@Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
|
||||
public int MAXIMUM_CYCLE_VALUE = 500;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
|
||||
*/
|
||||
|
|
@ -176,9 +180,15 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
|
||||
public String BINARY_TAG_NAME = null;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String DEFAULT_PLATFORM = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ import java.util.EnumSet;
|
|||
|
||||
public class CycleCovariate implements StandardCovariate {
|
||||
|
||||
private static final int MAXIMUM_CYCLE_VALUE = 1000;
|
||||
private int MAXIMUM_CYCLE_VALUE;
|
||||
private static final int CUSHION_FOR_INDELS = 4;
|
||||
private String default_platform = null;
|
||||
|
||||
|
|
@ -59,6 +59,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
this.MAXIMUM_CYCLE_VALUE = RAC.MAXIMUM_CYCLE_VALUE;
|
||||
|
||||
if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
|
||||
throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");
|
||||
|
||||
|
|
@ -88,6 +90,9 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
if ( cycle > MAXIMUM_CYCLE_VALUE )
|
||||
throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle was detected in read " + read.getReadName() + ". Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
|
||||
|
||||
final int substitutionKey = keyFromCycle(cycle);
|
||||
final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
|
||||
values.addCovariate(substitutionKey, indelKey, indelKey, i);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -53,9 +54,31 @@ public class CycleCovariateUnitTest {
|
|||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.formatKey(values[i][0]));
|
||||
int expected = init + (increment * i);
|
||||
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions={UserException.class})
|
||||
public void testMoreThanMaxCycleFails() {
|
||||
int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1;
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMaxCyclePasses() {
|
||||
int readLength = RAC.MAXIMUM_CYCLE_VALUE;
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
|
||||
covariate.recordValues(read, readCovariates);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue