Complete overhaul of previous commits to make it all work with scatter-gather. Now tracks output files correctly and can print to stdout.

This commit is contained in:
Eric Banks 2012-09-12 15:15:40 -04:00
parent 4bb7a99f08
commit d94d0d15c2
7 changed files with 60 additions and 58 deletions

View File

@ -49,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000"; String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{ return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "5a28b9fb5f2e36703e9804d276c38009")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "646a7c6db12cf0ec119bc27abed9c7b8")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "777f21676435837ba470497e17624266")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f7d77e0d86d033c69f25ef9858fdb95d")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "c3866646833cbb60831695d016d614d1")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "04c1d020bdb25fc55c3983748702290c")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "edf77f41cdd6c27f987cb1ecbcaa889b")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3d52db844e8220d2dbdcd1339b3d3000")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "47605edafb4da0859bf735a6bd2dfe9c")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0ac92d3548fdca8f253121842bb38c65")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "de7448f5bf787c17f1ee4c415bc90d3c")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "60542fe8a3cc89a47421767c6e1c11cd")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "f9a5a8f1b8f77f4c8857ccba8bff49a6")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3d52db844e8220d2dbdcd1339b3d3000")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "919d88b173b0c11cbca762132bc94ab9")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")},
}; };
} }

View File

@ -732,21 +732,6 @@ public class GenomeAnalysisEngine {
outputs.add(stub); outputs.add(stub);
} }
/**
* Iterates over all registered output stubs and tries to find the one representing the given OutputStream.
*
* @param output the stream to check for
* @return the file associated with the given stream/stub if available, null otherwise
*/
public File getFilenameFromAssociatedOutputStream(final OutputStream output) {
for ( final Stub<?> stub : outputs ) {
if ( stub.getOutputStream() == output )
return stub.getOutputFile();
}
return null;
}
/** /**
* Returns the tag associated with a given command-line argument. * Returns the tag associated with a given command-line argument.
* @param key Object for which to inspect the tag. * @param key Object for which to inspect the tag.

View File

@ -70,12 +70,15 @@ public class BQSRGatherer extends Gatherer {
generalReport.calculateQuantizedQualities(); generalReport.calculateQuantizedQualities();
RecalibrationArgumentCollection RAC = generalReport.getRAC(); RecalibrationArgumentCollection RAC = generalReport.getRAC();
if (RAC.recalibrationReport != null && RAC.RECAL_PDF != null) { if ( RAC.RECAL_PDF_FILE != null ) {
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); RAC.RECAL_TABLE_FILE = output;
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates()); if ( RAC.existingRecalibrationReport != null ) {
} final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
else if (RAC.RECAL_PDF != null) { RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates()); }
else {
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
} }
generalReport.output(outputFile); generalReport.output(outputFile);

View File

@ -50,6 +50,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.util.ArrayList; import java.util.ArrayList;
@ -149,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
RecalUtils.listAvailableCovariates(logger); RecalUtils.listAvailableCovariates(logger);
System.exit(0); System.exit(0);
} }
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst(); ArrayList<Covariate> requiredCovariates = covariates.getFirst();
@ -168,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
} }
try {
RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0; int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size(); numReadGroups += header.getReadGroups().size();
@ -283,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
generateReport(); generateReport();
logger.info("...done!"); logger.info("...done!");
if (RAC.RECAL_PDF != null) { if (RAC.RECAL_PDF_FILE != null) {
logger.info("Generating recalibration plots..."); logger.info("Generating recalibration plots...");
generatePlots(); generatePlots();
} }

View File

@ -59,10 +59,11 @@ public class RecalibrationArgumentCollection {
* After the header, data records occur one per line until the end of the file. The first several items on a line are the * After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last * values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. * and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
*/ */
@Gather(BQSRGatherer.class) @Gather(BQSRGatherer.class)
@Output(doc = "The output recalibration table file to create", required = true) @Output(doc = "The output recalibration table file to create", required = true)
public File RECAL_TABLE_FILE = null;
public PrintStream RECAL_TABLE; public PrintStream RECAL_TABLE;
/** /**
@ -70,14 +71,14 @@ public class RecalibrationArgumentCollection {
* However, we *highly* recommend that users generate these plots whenever possible for QC checking. * However, we *highly* recommend that users generate these plots whenever possible for QC checking.
*/ */
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false) @Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
public PrintStream RECAL_PDF = null; public File RECAL_PDF_FILE = null;
/** /**
* If not provided, then a temporary file is created and then deleted upon completion. * If not provided, then a temporary file is created and then deleted upon completion.
*/ */
@Hidden @Hidden
@Output(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false) @Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public PrintStream RECAL_CSV = null; public File RECAL_CSV_FILE = null;
/** /**
* List all implemented covariates. * List all implemented covariates.
@ -181,7 +182,7 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null; public String FORCE_PLATFORM = null;
public File recalibrationReport = null; public File existingRecalibrationReport = null;
public GATKReportTable generateReportTable(final String covariateNames) { public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
@ -214,7 +215,9 @@ public class RecalibrationArgumentCollection {
argumentsTable.addRowID("quantizing_levels", true); argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("recalibration_report", true); argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
argumentsTable.addRowID("plot_pdf_file", true);
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true); argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable; return argumentsTable;

View File

@ -366,9 +366,9 @@ public class RecalUtils {
final RScriptExecutor executor = new RScriptExecutor(); final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
//executor.addArgs(RAC.RECAL_CSV.getAbsolutePath()); executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
//executor.addArgs(RAC.RECAL_TABLE.getAbsolutePath()); executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
//executor.addArgs(RAC.RECAL_PDF.getAbsolutePath()); executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
executor.exec(); executor.exec();
} }
@ -377,20 +377,20 @@ public class RecalUtils {
} }
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) { public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
File temporaryFile = null; final PrintStream csvFile;
if ( RAC.RECAL_CSV == null ) { try {
try { if ( RAC.RECAL_CSV_FILE == null ) {
temporaryFile = File.createTempFile("BQSR", ".csv"); RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
temporaryFile.deleteOnExit(); RAC.RECAL_CSV_FILE.deleteOnExit();
RAC.RECAL_CSV = new PrintStream(temporaryFile);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(temporaryFile, "Temporary csv file " + temporaryFile + " could not be created because " + e.getMessage());
} }
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
} }
if ( recalibrated != null ) if ( recalibrated != null )
writeCSV(RAC.RECAL_CSV, recalibrated, "RECALIBRATED", requestedCovariates, true); writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(RAC.RECAL_CSV, original, "ORIGINAL", requestedCovariates, recalibrated == null); writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
outputRecalibrationPlot(RAC); outputRecalibrationPlot(RAC);
} }

View File

@ -285,7 +285,10 @@ public class RecalibrationReport {
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (argument.equals("recalibration_report")) else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value); RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("plot_pdf_file"))
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name")) else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value; RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;