Removed plots generation from the BaseRecalibration software

Improved AnalyzeCovariates (AC) integration test.
Renamed AC test files ending with .grp to .table

Implementation:

* Removed RECAL_PDF/CSV_FILE from RecalibrationArgumentCollection (RAC). Updated rest of the code accordingly.
* Fixed BQSRIntegrationTest to work with new changes
This commit is contained in:
Valentin Ruano-Rubio 2013-06-19 11:44:18 -04:00
parent 08f92bb6f9
commit 1f8282633b
10 changed files with 57 additions and 107 deletions

View File

@ -432,7 +432,7 @@ public final class AnalyzeCovariates extends RodWalker<AnalyzeCovariates.None,An
final Map<String,? extends CharSequence> diffs = exampleEntry.getValue().getRAC().compareReportArguments(
reportEntries[i].getValue().getRAC(),exampleEntry.getKey(),reportEntries[i].getKey());
if (diffs.size() != 0) {
throw new UserException("There are differences in relevant arguments of"
throw new UserException.IncompatibleRecalibrationTableParameters("There are differences in relevant arguments of"
+ " two or more input recalibration reports. Please make sure"
+ " they have been created using the same recalibration parameters."
+ " " + Utils.join("// ", reportDifferencesStringArray(diffs)));

View File

@ -92,18 +92,6 @@ public class BQSRGatherer extends Gatherer {
generalReport.calculateQuantizedQualities();
RecalibrationArgumentCollection RAC = generalReport.getRAC();
if ( RAC.RECAL_PDF_FILE != null ) {
RAC.RECAL_TABLE_FILE = output;
if ( RAC.existingRecalibrationReport != null ) {
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates());
}
else {
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates());
}
}
generalReport.output(outputFile);
}
}

View File

@ -180,11 +180,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
public void initialize() {
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
if (RAC.RECAL_PDF_FILE != null) {
Utils.warnUser("This is not the recommended way to generate recalibration plots any longer and will be"
+ " discontinued soon in future releases. Please use the 'AnalyzeCovariates' tool instead from now one");
}
if (RAC.FORCE_PLATFORM != null)
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
@ -522,11 +517,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
generateReport();
logger.info("...done!");
if ( RAC.RECAL_PDF_FILE != null ) {
logger.info("Generating recalibration plots...");
generatePlots();
}
logger.info("BaseRecalibrator was able to recalibrate " + result + " reads");
}
@ -534,16 +524,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
return recalibrationEngine.getFinalRecalibrationTables();
}
private void generatePlots() {
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
}
/**
* go through the quality score table and use the # observations and the empirical quality score
* to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to

View File

@ -89,21 +89,6 @@ public class RecalibrationArgumentCollection implements Cloneable {
public File RECAL_TABLE_FILE = null;
public PrintStream RECAL_TABLE;
/**
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
*/
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false, defaultToStdout = false)
public File RECAL_PDF_FILE = null;
/**
* If not provided, then a temporary file is created and then deleted upon completion.
* For advanced users only.
*/
@Advanced
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public File RECAL_CSV_FILE = null;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@ -284,8 +269,6 @@ public class RecalibrationArgumentCollection implements Cloneable {
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
argumentsTable.addRowID("plot_pdf_file", true);
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;

View File

@ -550,36 +550,48 @@ public class RecalUtils {
executor.exec();
}
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
private static void outputRecalibrationPlot(final File csvFile, final RecalibrationArgumentCollection RAC) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
executor.addArgs(csvFile.getAbsolutePath());
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
executor.exec();
}
/**
* Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead.
*
* @deprecated
*/
@Deprecated
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
}
/**
* Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead.
*
* @deprecated
*/
@Deprecated
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
final PrintStream csvFile;
final PrintStream csvStream;
final File csvTempFile = null;
try {
if ( RAC.RECAL_CSV_FILE == null ) {
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
RAC.RECAL_CSV_FILE.deleteOnExit();
}
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
File csvTmpFile = File.createTempFile("BQSR",".csv");
csvTmpFile.deleteOnExit();
csvStream = new PrintStream(csvTmpFile);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
throw new UserException("Could not create temporary csv file", e);
}
if ( recalibrated != null )
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
outputRecalibrationPlot(RAC);
writeCSV(csvStream, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(csvStream, original, "ORIGINAL", requestedCovariates, recalibrated == null);
csvStream.close();
outputRecalibrationPlot(csvTempFile, RAC);
csvTempFile.delete();
}
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {

View File

@ -340,9 +340,6 @@ public class RecalibrationReport {
else if (argument.equals("recalibration_report"))
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("plot_pdf_file"))
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;

View File

@ -77,18 +77,18 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
/**
* File containing the before report for normal testing.
*/
private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.grp");
private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.table");
/**
* File containing the after report for normal testing.
*/
private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.grp");
private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.table");
/**
* File containing the bqsr report for normal testing.
*/
private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.grp");
private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.table");
/**
* Test the content of the generated csv file.
@ -150,7 +150,7 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
final File afterFile = new File(TEST_DATA_DIR,afterFileName);
final WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(null,"%s",true,true,afterFile),
1,UserException.class);
1,UserException.IncompatibleRecalibrationTableParameters.class);
executeTest("testParameterChangeException - " + description, spec);
}
@ -237,10 +237,10 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
* Triplets &lt; alfter-grp-file, whether it should fail, what is different &gt;
*/
private final Object[][] DIFFERENT_PARAMETERS_AFTER_FILES = {
{"after-cov.grp", true, "Adds additional covaraite: repeat-length"},
{"after-dpSOLID.grp", true, "Change the default platform to SOLID"},
{"after-noDp.grp",true, "Unset the default platform"},
{"after-mcs4grp", true, "Changed -mcs parameter from 2 to 4"}
{"after-cov.table", true, "Adds additional covariate: repeat-length" },
{"after-dpSOLID.table", true, "Change the default platform to SOLID" },
{"after-noDp.table",true, "Unset the default platform" },
{"after-mcs4.table", true, "Changed -mcs parameter from 2 to 4" }
};
/**

View File

@ -100,23 +100,23 @@ public class BQSRIntegrationTest extends WalkerTest {
@DataProvider(name = "BQSRTest")
public Object[][] createBQSRTestData() {
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "61fd466b5e94d2d67e116f6f67c9f939")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "e08b5bcdb64f4beea03730e5631a14ca")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "448a45dc154c95d1387cb5cdddb67071")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "c1e7999e445d51bbe2e775dac5325643")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a57c16918cdfe12d55a89c21bf195279")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "836dccacf48ccda6b2843d07e8f1ef4d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "0fb2aedc2f8d66b5821cb570f15a8c4d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c9953f020a65c1603a6d71aeeb1b95f3")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "f805a0020eea987b79f314fa99913806")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "86075d3856eb06816a0dd81af55e421f")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "155802237e1fc7a001398b8f4bcf4b72")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "38c7916cc019fe8d134df67639422b42")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "b74e75f3c5aa90bd21af1e20f2ac8c40")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "e564505aea11464de8ed72890d9ea89a")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "380d8be121ffaddd3461ee0ac3d1a76f")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "281682124584ab384f23359934df0c3b")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0a92fdff5fd26227c29d34eda5a32f49")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "c41ef02c640ef1fed4bfc03b9b33b616")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "b577cd1d529425f66db49620db09fdca")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "9ad49269c0156f8ab1173261bf23e600")},
// make sure we work with ION torrent bam
{new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "7375c7b692e76b651c278a9fb478fa1c")},
{new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "04bfa4760767022e7f5252e6e4432cc1")},
};
}
@ -141,22 +141,6 @@ public class BQSRIntegrationTest extends WalkerTest {
executeTest("testBQSRFailWithoutDBSNP", spec);
}
@Test
public void testBQSRCSV() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
" -T BaseRecalibrator" +
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
" -knownSites " + b36dbSNP129 +
" -L 1:10,000,000-10,200,000" +
" -o /dev/null" +
" -sortAllCols" +
" --plot_pdf_file /dev/null" +
" --intermediate_csv_file %s",
Arrays.asList("90ad19143024684e3c4410dc8fd2bd9d"));
executeTest("testBQSR-CSVfile", spec);
}
@Test
public void testBQSRFailWithSolidNoCall() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(

View File

@ -85,7 +85,7 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
xlab(paste(cov,"Covariate")) +
ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality));
ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)) +
blankTheme
e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
opts(axis.text.x=theme_text(angle=90, hjust=0))

View File

@ -471,4 +471,10 @@ public class UserException extends ReviewedStingException {
super(message,innerException);
}
}
public static class IncompatibleRecalibrationTableParameters extends UserException {
public IncompatibleRecalibrationTableParameters(String s) {
super(s);
}
}
}