From d94d0d15c2e92568df7e20b6c3caf87aa20ff815 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 12 Sep 2012 15:15:40 -0400 Subject: [PATCH] Complete overhaul of previous commits to make it all work with scatter-gather. Now tracks output files correctly and can print to stdout. --- .../walkers/bqsr/BQSRIntegrationTest.java | 30 +++++++++---------- .../sting/gatk/GenomeAnalysisEngine.java | 15 ---------- .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 15 ++++++---- .../gatk/walkers/bqsr/BaseRecalibrator.java | 12 ++++++-- .../bqsr/RecalibrationArgumentCollection.java | 15 ++++++---- .../sting/utils/recalibration/RecalUtils.java | 26 ++++++++-------- .../recalibration/RecalibrationReport.java | 5 +++- 7 files changed, 60 insertions(+), 58 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 58ce7ffef..b0e9ef4fe 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -49,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest { String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqInterval = "chr1:10,000,000-10,100,000"; return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "5a28b9fb5f2e36703e9804d276c38009")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "646a7c6db12cf0ec119bc27abed9c7b8")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "777f21676435837ba470497e17624266")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f7d77e0d86d033c69f25ef9858fdb95d")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "c3866646833cbb60831695d016d614d1")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "04c1d020bdb25fc55c3983748702290c")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "edf77f41cdd6c27f987cb1ecbcaa889b")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3d52db844e8220d2dbdcd1339b3d3000")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "47605edafb4da0859bf735a6bd2dfe9c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0ac92d3548fdca8f253121842bb38c65")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "de7448f5bf787c17f1ee4c415bc90d3c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "60542fe8a3cc89a47421767c6e1c11cd")}, - {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "f9a5a8f1b8f77f4c8857ccba8bff49a6")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3d52db844e8220d2dbdcd1339b3d3000")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "919d88b173b0c11cbca762132bc94ab9")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")}, + {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")}, }; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index bc37b0557..fc2546173 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -732,21 +732,6 @@ public class GenomeAnalysisEngine { outputs.add(stub); } - /** - * Iterates over all registered output stubs and tries to find the one representing the given OutputStream. - * - * @param output the stream to check for - * @return the file associated with the given stream/stub if available, null otherwise - */ - public File getFilenameFromAssociatedOutputStream(final OutputStream output) { - for ( final Stub stub : outputs ) { - if ( stub.getOutputStream() == output ) - return stub.getOutputFile(); - } - - return null; - } - /** * Returns the tag associated with a given command-line argument. * @param key Object for which to inspect the tag. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index 128b3f809..dbb628135 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -70,12 +70,15 @@ public class BQSRGatherer extends Gatherer { generalReport.calculateQuantizedQualities(); RecalibrationArgumentCollection RAC = generalReport.getRAC(); - if (RAC.recalibrationReport != null && RAC.RECAL_PDF != null) { - final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); - RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates()); - } - else if (RAC.RECAL_PDF != null) { - RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates()); + if ( RAC.RECAL_PDF_FILE != null ) { + RAC.RECAL_TABLE_FILE = output; + if ( RAC.existingRecalibrationReport != null ) { + final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport); + RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates()); + } + else { + RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates()); + } } generalReport.output(outputFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 04ebeed55..e78b9b6fc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -50,6 +50,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; +import java.io.IOException; +import java.io.PrintStream; import java.lang.reflect.Constructor; import java.util.ArrayList; @@ -149,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed RecalUtils.listAvailableCovariates(logger); System.exit(0); } - RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table + RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); @@ -168,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection } + try { + RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE); + } catch (IOException e) { + throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e); + } + int numReadGroups = 0; for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) numReadGroups += header.getReadGroups().size(); @@ -283,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed generateReport(); logger.info("...done!"); - if (RAC.RECAL_PDF != null) { + if (RAC.RECAL_PDF_FILE != null) { logger.info("Generating recalibration plots..."); generatePlots(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index e230817ec..d08239b96 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -59,10 +59,11 @@ public class RecalibrationArgumentCollection { * After the header, data records occur one per line until the end of the file. The first several items on a line are the * values of the individual covariates and will change depending on which covariates were specified at runtime. The last * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + * and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out. */ @Gather(BQSRGatherer.class) @Output(doc = "The output recalibration table file to create", required = true) + public File RECAL_TABLE_FILE = null; public PrintStream RECAL_TABLE; /** @@ -70,14 +71,14 @@ public class RecalibrationArgumentCollection { * However, we *highly* recommend that users generate these plots whenever possible for QC checking. */ @Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false) - public PrintStream RECAL_PDF = null; + public File RECAL_PDF_FILE = null; /** * If not provided, then a temporary file is created and then deleted upon completion. */ @Hidden - @Output(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false) - public PrintStream RECAL_CSV = null; + @Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false) + public File RECAL_CSV_FILE = null; /** * List all implemented covariates. @@ -181,7 +182,7 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; - public File recalibrationReport = null; + public File existingRecalibrationReport = null; public GATKReportTable generateReportTable(final String covariateNames) { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); @@ -214,7 +215,9 @@ public class RecalibrationArgumentCollection { argumentsTable.addRowID("quantizing_levels", true); argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.addRowID("recalibration_report", true); - argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); + argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath()); + argumentsTable.addRowID("plot_pdf_file", true); + argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath()); argumentsTable.addRowID("binary_tag_name", true); argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); return argumentsTable; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 980ca715b..ca490789f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -366,9 +366,9 @@ public class RecalUtils { final RScriptExecutor executor = new RScriptExecutor(); executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); - //executor.addArgs(RAC.RECAL_CSV.getAbsolutePath()); - //executor.addArgs(RAC.RECAL_TABLE.getAbsolutePath()); - //executor.addArgs(RAC.RECAL_PDF.getAbsolutePath()); + executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath()); + executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath()); + executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath()); executor.exec(); } @@ -377,20 +377,20 @@ public class RecalUtils { } public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) { - File temporaryFile = null; - if ( RAC.RECAL_CSV == null ) { - try { - temporaryFile = File.createTempFile("BQSR", ".csv"); - temporaryFile.deleteOnExit(); - RAC.RECAL_CSV = new PrintStream(temporaryFile); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(temporaryFile, "Temporary csv file " + temporaryFile + " could not be created because " + e.getMessage()); + final PrintStream csvFile; + try { + if ( RAC.RECAL_CSV_FILE == null ) { + RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv"); + RAC.RECAL_CSV_FILE.deleteOnExit(); } + csvFile = new PrintStream(RAC.RECAL_CSV_FILE); + } catch (IOException e) { + throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e); } if ( recalibrated != null ) - writeCSV(RAC.RECAL_CSV, recalibrated, "RECALIBRATED", requestedCovariates, true); - writeCSV(RAC.RECAL_CSV, original, "ORIGINAL", requestedCovariates, recalibrated == null); + writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true); + writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null); outputRecalibrationPlot(RAC); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index b22956b4a..41b07832c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -285,7 +285,10 @@ public class RecalibrationReport { RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); else if (argument.equals("recalibration_report")) - RAC.recalibrationReport = (value == null) ? null : new File((String) value); + RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value); + + else if (argument.equals("plot_pdf_file")) + RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value); else if (argument.equals("binary_tag_name")) RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;