Improved BQSR plotting and some new parameters

* Refactored CycleCovariate to be a fragment covariate instead of a per read covariate
   * Refactored the CycleCovariateUnitTest to test the pairing information
   * Updated BQSR Integration tests accordingly
   * Made quantization levels parameter not hidden anymore
   * Added hidden option to keep intermediate plotting files for debug purposes (they're automatically deleted)
   * Added hidden option not to generate the plots automatically (important for scatter/gathering)
This commit is contained in:
Mauricio Carneiro 2012-04-18 13:02:41 -04:00
parent 4999ae87ad
commit 68d0211fa1
4 changed files with 38 additions and 10 deletions

View File

@ -66,18 +66,18 @@ public class CycleCovariate implements StandardCovariate {
// Discrete cycle platforms
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
final short init;
final short readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? (short) -1 : 1;
final short increment;
if (!read.getReadNegativeStrandFlag()) {
init = 1;
increment = 1;
short cycle;
if (read.getReadNegativeStrandFlag()) {
cycle = (short) (read.getReadLength() * readOrderFactor);
increment = (short) (-1 * readOrderFactor);
}
else {
init = (short) read.getReadLength();
increment = -1;
cycle = readOrderFactor;
increment = readOrderFactor;
}
short cycle = init;
for (int i = 0; i < read.getReadLength(); i++) {
cycles[i] = BitSetUtils.bitSetFrom(cycle);
cycle += increment;

View File

@ -143,9 +143,18 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
public byte DELETIONS_DEFAULT_QUALITY = 45;
/**
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter defines the quality below which (inclusive) a tail is considered low quality
*/
@Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false)
public byte LOW_QUAL_TAIL = 2;
/**
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base qualities, this is done by the engine with the -qq or -BQSR options.
* This parameter tells BQSR the number of levels of quantization to use to build the quantization table.
*/
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
public int QUANTIZING_LEVELS = 16;
@Hidden
@ -155,8 +164,11 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
public int QUANTIZING_LEVELS = 16;
@Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots")
public boolean KEEP_INTERMEDIATE_FILES = false;
@Hidden
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
public boolean NO_PLOTS = false;
public GATKReportTable generateReportTable() {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run");
@ -176,6 +188,8 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
return argumentsTable;
}

View File

@ -279,6 +279,12 @@ public class RecalibrationReport {
else if (primaryKey.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (primaryKey.equals("keep_intermediate_files"))
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
else if (primaryKey.equals("no_plots"))
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
}
return RAC;

View File

@ -26,8 +26,9 @@ public class CycleCovariateUnitTest {
@Test(enabled = true)
public void testSimpleCycles() {
short readLength = 10;
short readLength = 10;
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
read.setReadPairedFlag(true);
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
read.getReadGroup().setPlatform("illumina");
@ -38,6 +39,13 @@ public class CycleCovariateUnitTest {
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
read.setSecondOfPairFlag(true);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
read.setReadNegativeStrandFlag(false);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
}
private void verifyCovariateArray(BitSet[] values, short init, short increment) {