Track all outputs from BQSR (.table, .csv., and .pdf) as @Output arguments. Updated integration tests because we no longer have command-line options not to generate plots (now just don't provide a pdf) or to keep the intermediate csv (now, just provide a filename on the command-line). This is currently busted because we can't access the original filenames from the Engine's storage/stub system and therefore cannot call out to the Rscript with the executor (which requires filename strings).

This commit is contained in:
Eric Banks 2012-09-12 11:24:53 -04:00
parent bfbf1686cd
commit 994a4ff387
6 changed files with 67 additions and 92 deletions

View File

@ -34,7 +34,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -I " + bam +
" -L " + interval +
args +
" --no_plots" +
" -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
" -o %s";
}
@ -50,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "5a28b9fb5f2e36703e9804d276c38009")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "646a7c6db12cf0ec119bc27abed9c7b8")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "777f21676435837ba470497e17624266")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f7d77e0d86d033c69f25ef9858fdb95d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "c3866646833cbb60831695d016d614d1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "04c1d020bdb25fc55c3983748702290c")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "edf77f41cdd6c27f987cb1ecbcaa889b")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3d52db844e8220d2dbdcd1339b3d3000")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "47605edafb4da0859bf735a6bd2dfe9c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0ac92d3548fdca8f253121842bb38c65")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "de7448f5bf787c17f1ee4c415bc90d3c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "60542fe8a3cc89a47421767c6e1c11cd")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "f9a5a8f1b8f77f4c8857ccba8bff49a6")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3d52db844e8220d2dbdcd1339b3d3000")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "919d88b173b0c11cbca762132bc94ab9")},
};
}
@ -88,7 +87,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
" -L 1:10,000,000-10,200,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.CommandLineException.class);
@ -102,7 +100,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.class);

View File

@ -49,7 +49,6 @@ public class BQSRGatherer extends Gatherer {
@Override
public void gather(List<File> inputs, File output) {
RecalibrationReport generalReport = null;
final PrintStream outputFile;
try {
outputFile = new PrintStream(output);
@ -57,6 +56,7 @@ public class BQSRGatherer extends Gatherer {
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
}
RecalibrationReport generalReport = null;
for (File input : inputs) {
final RecalibrationReport inputReport = new RecalibrationReport(input);
if (generalReport == null)
@ -70,14 +70,12 @@ public class BQSRGatherer extends Gatherer {
generalReport.calculateQuantizedQualities();
RecalibrationArgumentCollection RAC = generalReport.getRAC();
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
if (RAC.recalibrationReport != null && RAC.RECAL_PDF != null) {
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
else if (RAC.RECAL_PDF != null) {
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
generalReport.output(outputFile);

View File

@ -50,8 +50,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
@ -110,6 +108,7 @@ import java.util.ArrayList;
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
@ -284,7 +283,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) {
if (RAC.RECAL_PDF != null) {
logger.info("Generating recalibration plots...");
generatePlots();
}
@ -296,10 +295,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
}
@ -313,14 +312,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
}
private void generateReport() {
PrintStream output;
try {
output = new PrintStream(RAC.RECAL_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
}
}

View File

@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
@ -62,8 +62,22 @@ public class RecalibrationArgumentCollection {
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/
@Gather(BQSRGatherer.class)
@Output
public File RECAL_FILE;
@Output(doc = "The output recalibration table file to create", required = true)
public PrintStream RECAL_TABLE;
/**
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
*/
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
public PrintStream RECAL_PDF = null;
/**
* If not provided, then a temporary file is created and then deleted upon completion.
*/
@Hidden
@Output(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public PrintStream RECAL_CSV = null;
/**
* List all implemented covariates.
@ -166,12 +180,6 @@ public class RecalibrationArgumentCollection {
@Hidden
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots")
public boolean KEEP_INTERMEDIATE_FILES = false;
@Hidden
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
public boolean NO_PLOTS = false;
public File recalibrationReport = null;
@ -205,10 +213,6 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);

View File

@ -47,7 +47,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
@ -333,8 +333,8 @@ public class RecalUtils {
return covariate.getClass().getSimpleName().split("Covariate")[0];
}
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), RAC.RECAL_TABLE);
}
/**
@ -362,46 +362,36 @@ public class RecalUtils {
report.print(outputFile);
}
private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename) {
final PrintStream deltaTableStream;
final File deltaTableFileName = new File(filename + ".csv");
try {
deltaTableStream = new PrintStream(deltaTableFileName);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(deltaTableFileName, "File " + deltaTableFileName + " could not be created");
}
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
}
private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
final File csvFileName = files.getSecond();
final File plotFileName = new File(csvFileName + ".pdf");
files.getFirst().close();
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
executor.addArgs(csvFileName.getAbsolutePath());
executor.addArgs(gatkReportFilename.getAbsolutePath());
executor.addArgs(plotFileName.getAbsolutePath());
//executor.addArgs(RAC.RECAL_CSV.getAbsolutePath());
//executor.addArgs(RAC.RECAL_TABLE.getAbsolutePath());
//executor.addArgs(RAC.RECAL_PDF.getAbsolutePath());
executor.exec();
if (!keepIntermediates)
if (!csvFileName.delete())
throw new ReviewedStingException("Could not find file " + csvFileName.getAbsolutePath());
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
File temporaryFile = null;
if ( RAC.RECAL_CSV == null ) {
try {
temporaryFile = File.createTempFile("BQSR", ".csv");
temporaryFile.deleteOnExit();
RAC.RECAL_CSV = new PrintStream(temporaryFile);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(temporaryFile, "Temporary csv file " + temporaryFile + " could not be created because " + e.getMessage());
}
}
if ( recalibrated != null )
writeCSV(RAC.RECAL_CSV, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(RAC.RECAL_CSV, original, "ORIGINAL", requestedCovariates, recalibrated == null);
outputRecalibrationPlot(RAC);
}
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {

View File

@ -284,12 +284,6 @@ public class RecalibrationReport {
else if (argument.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (argument.equals("keep_intermediate_files"))
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
else if (argument.equals("no_plots"))
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);