Complete overhaul of previous commits to make it all work with scatter-gather. Now tracks output files correctly and can print to stdout.
This commit is contained in:
parent
4bb7a99f08
commit
d94d0d15c2
|
|
@ -49,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "5a28b9fb5f2e36703e9804d276c38009")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "646a7c6db12cf0ec119bc27abed9c7b8")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "777f21676435837ba470497e17624266")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f7d77e0d86d033c69f25ef9858fdb95d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "c3866646833cbb60831695d016d614d1")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "04c1d020bdb25fc55c3983748702290c")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "edf77f41cdd6c27f987cb1ecbcaa889b")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3d52db844e8220d2dbdcd1339b3d3000")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "47605edafb4da0859bf735a6bd2dfe9c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0ac92d3548fdca8f253121842bb38c65")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "de7448f5bf787c17f1ee4c415bc90d3c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "60542fe8a3cc89a47421767c6e1c11cd")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "f9a5a8f1b8f77f4c8857ccba8bff49a6")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3d52db844e8220d2dbdcd1339b3d3000")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "919d88b173b0c11cbca762132bc94ab9")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -732,21 +732,6 @@ public class GenomeAnalysisEngine {
|
|||
outputs.add(stub);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates over all registered output stubs and tries to find the one representing the given OutputStream.
|
||||
*
|
||||
* @param output the stream to check for
|
||||
* @return the file associated with the given stream/stub if available, null otherwise
|
||||
*/
|
||||
public File getFilenameFromAssociatedOutputStream(final OutputStream output) {
|
||||
for ( final Stub<?> stub : outputs ) {
|
||||
if ( stub.getOutputStream() == output )
|
||||
return stub.getOutputFile();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the tag associated with a given command-line argument.
|
||||
* @param key Object for which to inspect the tag.
|
||||
|
|
|
|||
|
|
@ -70,12 +70,15 @@ public class BQSRGatherer extends Gatherer {
|
|||
generalReport.calculateQuantizedQualities();
|
||||
|
||||
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||
if (RAC.recalibrationReport != null && RAC.RECAL_PDF != null) {
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
}
|
||||
else if (RAC.RECAL_PDF != null) {
|
||||
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
if ( RAC.RECAL_PDF_FILE != null ) {
|
||||
RAC.RECAL_TABLE_FILE = output;
|
||||
if ( RAC.existingRecalibrationReport != null ) {
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
}
|
||||
else {
|
||||
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
}
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
@ -149,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
RecalUtils.listAvailableCovariates(logger);
|
||||
System.exit(0);
|
||||
}
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
|
|
@ -168,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
}
|
||||
|
||||
try {
|
||||
RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
|
||||
}
|
||||
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
|
|
@ -283,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if (RAC.RECAL_PDF != null) {
|
||||
if (RAC.RECAL_PDF_FILE != null) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,10 +59,11 @@ public class RecalibrationArgumentCollection {
|
|||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
|
||||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output(doc = "The output recalibration table file to create", required = true)
|
||||
public File RECAL_TABLE_FILE = null;
|
||||
public PrintStream RECAL_TABLE;
|
||||
|
||||
/**
|
||||
|
|
@ -70,14 +71,14 @@ public class RecalibrationArgumentCollection {
|
|||
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
|
||||
*/
|
||||
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
|
||||
public PrintStream RECAL_PDF = null;
|
||||
public File RECAL_PDF_FILE = null;
|
||||
|
||||
/**
|
||||
* If not provided, then a temporary file is created and then deleted upon completion.
|
||||
*/
|
||||
@Hidden
|
||||
@Output(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
|
||||
public PrintStream RECAL_CSV = null;
|
||||
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
|
||||
public File RECAL_CSV_FILE = null;
|
||||
|
||||
/**
|
||||
* List all implemented covariates.
|
||||
|
|
@ -181,7 +182,7 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
|
||||
public File recalibrationReport = null;
|
||||
public File existingRecalibrationReport = null;
|
||||
|
||||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
|
|
@ -214,7 +215,9 @@ public class RecalibrationArgumentCollection {
|
|||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("plot_pdf_file", true);
|
||||
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
|
|
|
|||
|
|
@ -366,9 +366,9 @@ public class RecalUtils {
|
|||
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
||||
//executor.addArgs(RAC.RECAL_CSV.getAbsolutePath());
|
||||
//executor.addArgs(RAC.RECAL_TABLE.getAbsolutePath());
|
||||
//executor.addArgs(RAC.RECAL_PDF.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
|
||||
executor.exec();
|
||||
}
|
||||
|
||||
|
|
@ -377,20 +377,20 @@ public class RecalUtils {
|
|||
}
|
||||
|
||||
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
|
||||
File temporaryFile = null;
|
||||
if ( RAC.RECAL_CSV == null ) {
|
||||
try {
|
||||
temporaryFile = File.createTempFile("BQSR", ".csv");
|
||||
temporaryFile.deleteOnExit();
|
||||
RAC.RECAL_CSV = new PrintStream(temporaryFile);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(temporaryFile, "Temporary csv file " + temporaryFile + " could not be created because " + e.getMessage());
|
||||
final PrintStream csvFile;
|
||||
try {
|
||||
if ( RAC.RECAL_CSV_FILE == null ) {
|
||||
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
|
||||
RAC.RECAL_CSV_FILE.deleteOnExit();
|
||||
}
|
||||
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
|
||||
}
|
||||
|
||||
if ( recalibrated != null )
|
||||
writeCSV(RAC.RECAL_CSV, recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(RAC.RECAL_CSV, original, "ORIGINAL", requestedCovariates, recalibrated == null);
|
||||
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
|
||||
outputRecalibrationPlot(RAC);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -285,7 +285,10 @@ public class RecalibrationReport {
|
|||
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("recalibration_report"))
|
||||
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
|
||||
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("plot_pdf_file"))
|
||||
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("binary_tag_name"))
|
||||
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
|
||||
|
|
|
|||
Loading…
Reference in New Issue