Removed plots generation from the BaseRecalibration software
Improved AnalyzeCovariates (AC) integration test. Renamed AC test files ending with .grp to .table Implementation: * Removed RECAL_PDF/CSV_FILE from RecalibrationArgumentCollection (RAC). Updated rest of the code accordingly. * Fixed BQSRIntegrationTest to work with new changes
This commit is contained in:
parent
08f92bb6f9
commit
1f8282633b
|
|
@ -432,7 +432,7 @@ public final class AnalyzeCovariates extends RodWalker<AnalyzeCovariates.None,An
|
|||
final Map<String,? extends CharSequence> diffs = exampleEntry.getValue().getRAC().compareReportArguments(
|
||||
reportEntries[i].getValue().getRAC(),exampleEntry.getKey(),reportEntries[i].getKey());
|
||||
if (diffs.size() != 0) {
|
||||
throw new UserException("There are differences in relevant arguments of"
|
||||
throw new UserException.IncompatibleRecalibrationTableParameters("There are differences in relevant arguments of"
|
||||
+ " two or more input recalibration reports. Please make sure"
|
||||
+ " they have been created using the same recalibration parameters."
|
||||
+ " " + Utils.join("// ", reportDifferencesStringArray(diffs)));
|
||||
|
|
|
|||
|
|
@ -92,18 +92,6 @@ public class BQSRGatherer extends Gatherer {
|
|||
|
||||
generalReport.calculateQuantizedQualities();
|
||||
|
||||
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||
if ( RAC.RECAL_PDF_FILE != null ) {
|
||||
RAC.RECAL_TABLE_FILE = output;
|
||||
if ( RAC.existingRecalibrationReport != null ) {
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates());
|
||||
}
|
||||
else {
|
||||
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates());
|
||||
}
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -180,11 +180,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
public void initialize() {
|
||||
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
|
||||
|
||||
if (RAC.RECAL_PDF_FILE != null) {
|
||||
Utils.warnUser("This is not the recommended way to generate recalibration plots any longer and will be"
|
||||
+ " discontinued soon in future releases. Please use the 'AnalyzeCovariates' tool instead from now one");
|
||||
}
|
||||
|
||||
if (RAC.FORCE_PLATFORM != null)
|
||||
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||
|
||||
|
|
@ -522,11 +517,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if ( RAC.RECAL_PDF_FILE != null ) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
|
||||
logger.info("BaseRecalibrator was able to recalibrate " + result + " reads");
|
||||
}
|
||||
|
||||
|
|
@ -534,16 +524,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
return recalibrationEngine.getFinalRecalibrationTables();
|
||||
}
|
||||
|
||||
private void generatePlots() {
|
||||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
else
|
||||
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
|
||||
/**
|
||||
* go through the quality score table and use the # observations and the empirical quality score
|
||||
* to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to
|
||||
|
|
|
|||
|
|
@ -89,21 +89,6 @@ public class RecalibrationArgumentCollection implements Cloneable {
|
|||
public File RECAL_TABLE_FILE = null;
|
||||
public PrintStream RECAL_TABLE;
|
||||
|
||||
/**
|
||||
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
|
||||
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
|
||||
*/
|
||||
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false, defaultToStdout = false)
|
||||
public File RECAL_PDF_FILE = null;
|
||||
|
||||
/**
|
||||
* If not provided, then a temporary file is created and then deleted upon completion.
|
||||
* For advanced users only.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
|
||||
public File RECAL_CSV_FILE = null;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
|
|
@ -284,8 +269,6 @@ public class RecalibrationArgumentCollection implements Cloneable {
|
|||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("plot_pdf_file", true);
|
||||
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
|
|
|
|||
|
|
@ -550,36 +550,48 @@ public class RecalUtils {
|
|||
executor.exec();
|
||||
}
|
||||
|
||||
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
|
||||
private static void outputRecalibrationPlot(final File csvFile, final RecalibrationArgumentCollection RAC) {
|
||||
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
||||
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
|
||||
executor.addArgs(csvFile.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
|
||||
executor.exec();
|
||||
}
|
||||
|
||||
/**
|
||||
* Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
@Deprecated
|
||||
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
|
||||
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
@Deprecated
|
||||
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
|
||||
final PrintStream csvFile;
|
||||
final PrintStream csvStream;
|
||||
final File csvTempFile = null;
|
||||
try {
|
||||
if ( RAC.RECAL_CSV_FILE == null ) {
|
||||
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
|
||||
RAC.RECAL_CSV_FILE.deleteOnExit();
|
||||
}
|
||||
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
|
||||
File csvTmpFile = File.createTempFile("BQSR",".csv");
|
||||
csvTmpFile.deleteOnExit();
|
||||
csvStream = new PrintStream(csvTmpFile);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
|
||||
throw new UserException("Could not create temporary csv file", e);
|
||||
}
|
||||
|
||||
if ( recalibrated != null )
|
||||
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
|
||||
outputRecalibrationPlot(RAC);
|
||||
writeCSV(csvStream, recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(csvStream, original, "ORIGINAL", requestedCovariates, recalibrated == null);
|
||||
csvStream.close();
|
||||
outputRecalibrationPlot(csvTempFile, RAC);
|
||||
csvTempFile.delete();
|
||||
}
|
||||
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
|
|
|
|||
|
|
@ -340,9 +340,6 @@ public class RecalibrationReport {
|
|||
else if (argument.equals("recalibration_report"))
|
||||
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("plot_pdf_file"))
|
||||
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("binary_tag_name"))
|
||||
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
|
||||
|
||||
|
|
|
|||
|
|
@ -77,18 +77,18 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
|
|||
/**
|
||||
* File containing the before report for normal testing.
|
||||
*/
|
||||
private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.grp");
|
||||
private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.table");
|
||||
|
||||
/**
|
||||
* File containing the after report for normal testing.
|
||||
*/
|
||||
private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.grp");
|
||||
private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.table");
|
||||
|
||||
|
||||
/**
|
||||
* File containing the bqsr report for normal testing.
|
||||
*/
|
||||
private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.grp");
|
||||
private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.table");
|
||||
|
||||
/**
|
||||
* Test the content of the generated csv file.
|
||||
|
|
@ -150,7 +150,7 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
|
|||
final File afterFile = new File(TEST_DATA_DIR,afterFileName);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(null,"%s",true,true,afterFile),
|
||||
1,UserException.class);
|
||||
1,UserException.IncompatibleRecalibrationTableParameters.class);
|
||||
executeTest("testParameterChangeException - " + description, spec);
|
||||
}
|
||||
|
||||
|
|
@ -237,10 +237,10 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest {
|
|||
* Triplets < alfter-grp-file, whether it should fail, what is different >
|
||||
*/
|
||||
private final Object[][] DIFFERENT_PARAMETERS_AFTER_FILES = {
|
||||
{"after-cov.grp", true, "Adds additional covaraite: repeat-length"},
|
||||
{"after-dpSOLID.grp", true, "Change the default platform to SOLID"},
|
||||
{"after-noDp.grp",true, "Unset the default platform"},
|
||||
{"after-mcs4grp", true, "Changed -mcs parameter from 2 to 4"}
|
||||
{"after-cov.table", true, "Adds additional covariate: repeat-length" },
|
||||
{"after-dpSOLID.table", true, "Change the default platform to SOLID" },
|
||||
{"after-noDp.table",true, "Unset the default platform" },
|
||||
{"after-mcs4.table", true, "Changed -mcs parameter from 2 to 4" }
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -100,23 +100,23 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
@DataProvider(name = "BQSRTest")
|
||||
public Object[][] createBQSRTestData() {
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "61fd466b5e94d2d67e116f6f67c9f939")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "e08b5bcdb64f4beea03730e5631a14ca")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "448a45dc154c95d1387cb5cdddb67071")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "c1e7999e445d51bbe2e775dac5325643")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a57c16918cdfe12d55a89c21bf195279")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "836dccacf48ccda6b2843d07e8f1ef4d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "0fb2aedc2f8d66b5821cb570f15a8c4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c9953f020a65c1603a6d71aeeb1b95f3")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "f805a0020eea987b79f314fa99913806")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "86075d3856eb06816a0dd81af55e421f")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "155802237e1fc7a001398b8f4bcf4b72")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "38c7916cc019fe8d134df67639422b42")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "b74e75f3c5aa90bd21af1e20f2ac8c40")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "e564505aea11464de8ed72890d9ea89a")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "380d8be121ffaddd3461ee0ac3d1a76f")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "281682124584ab384f23359934df0c3b")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0a92fdff5fd26227c29d34eda5a32f49")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "c41ef02c640ef1fed4bfc03b9b33b616")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "b577cd1d529425f66db49620db09fdca")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "9ad49269c0156f8ab1173261bf23e600")},
|
||||
// make sure we work with ION torrent bam
|
||||
{new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "7375c7b692e76b651c278a9fb478fa1c")},
|
||||
{new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "04bfa4760767022e7f5252e6e4432cc1")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -141,22 +141,6 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
executeTest("testBQSRFailWithoutDBSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRCSV() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T BaseRecalibrator" +
|
||||
" -R " + b36KGReference +
|
||||
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -o /dev/null" +
|
||||
" -sortAllCols" +
|
||||
" --plot_pdf_file /dev/null" +
|
||||
" --intermediate_csv_file %s",
|
||||
Arrays.asList("90ad19143024684e3c4410dc8fd2bd9d"));
|
||||
executeTest("testBQSR-CSVfile", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRFailWithSolidNoCall() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
|||
|
||||
p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
|
||||
xlab(paste(cov,"Covariate")) +
|
||||
ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality));
|
||||
ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)) +
|
||||
blankTheme
|
||||
e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
|
||||
opts(axis.text.x=theme_text(angle=90, hjust=0))
|
||||
|
|
|
|||
|
|
@ -471,4 +471,10 @@ public class UserException extends ReviewedStingException {
|
|||
super(message,innerException);
|
||||
}
|
||||
}
|
||||
|
||||
public static class IncompatibleRecalibrationTableParameters extends UserException {
|
||||
public IncompatibleRecalibrationTableParameters(String s) {
|
||||
super(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue