Expanded the BQSR reporting script
-- Includes header page -- Table of arguments (Arguments) -- Summary of counts (RecalData0) -- Summary of counts by qual (RecalData1) -- Fixed bug in output that resulted in covariates list always being null (updated md5s accordingly) -- BQSR.R loads all relevant libaries now, include gplots, grid, and gsalib to run correctly
This commit is contained in:
parent
458bbdee8f
commit
243af0adb1
|
|
@ -50,20 +50,20 @@ public class BQSRIntegrationTest extends WalkerTest {
|
||||||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||||
return new Object[][]{
|
return new Object[][]{
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
|
||||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
|
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
|
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,18 @@
|
||||||
library("ggplot2")
|
library("ggplot2")
|
||||||
|
library(gplots)
|
||||||
|
library("reshape")
|
||||||
|
library("grid")
|
||||||
library("tools") #For compactPDF in R 2.13+
|
library("tools") #For compactPDF in R 2.13+
|
||||||
|
library(gsalib)
|
||||||
|
|
||||||
args <- commandArgs(TRUE)
|
|
||||||
|
if ( interactive() ) {
|
||||||
|
args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
|
||||||
|
} else {
|
||||||
|
args <- commandArgs(TRUE)
|
||||||
|
}
|
||||||
data <- read.csv(args[1])
|
data <- read.csv(args[1])
|
||||||
|
gsa.report <- gsa.read.gatkreport(args[2])
|
||||||
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
|
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
|
||||||
|
|
||||||
numRG = length(unique(data$ReadGroup))
|
numRG = length(unique(data$ReadGroup))
|
||||||
|
|
@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
||||||
|
|
||||||
p <- ggplot(d, aes(x=CovariateValue)) +
|
p <- ggplot(d, aes(x=CovariateValue)) +
|
||||||
xlab(paste(cov,"Covariate")) +
|
xlab(paste(cov,"Covariate")) +
|
||||||
ylab("Number of Observations") +
|
ylab("No. of Observations (area normalized)") +
|
||||||
blankTheme
|
blankTheme
|
||||||
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
|
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
|
||||||
scale_y_continuous(formatter="comma")
|
d <- d + scale_fill_manual(values=c("maroon1","blue"))
|
||||||
|
d <- d + facet_grid(.~EventType)
|
||||||
|
# d <- d + scale_y_continuous(formatter="comma")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pdf(args[2],height=9,width=15)
|
if ( ! is.na(args[3]) )
|
||||||
|
pdf(args[3],height=9,width=15)
|
||||||
|
|
||||||
|
#frame()
|
||||||
|
textplot(gsa.report$Arguments, show.rownames=F)
|
||||||
|
title(
|
||||||
|
main="GATK BaseRecalibration report",
|
||||||
|
sub=date())
|
||||||
|
|
||||||
distributeGraphRows(list(a,b,c), c(1,1,1))
|
distributeGraphRows(list(a,b,c), c(1,1,1))
|
||||||
distributeGraphRows(list(d,e,f), c(1,1,1))
|
distributeGraphRows(list(d,e,f), c(1,1,1))
|
||||||
dev.off()
|
|
||||||
|
|
||||||
|
# format the overall information
|
||||||
|
rt0 <- data.frame(
|
||||||
|
ReadGroup = gsa.report$RecalTable0$ReadGroup,
|
||||||
|
EventType = gsa.report$RecalTable0$EventType,
|
||||||
|
EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
|
||||||
|
EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
|
||||||
|
Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
|
||||||
|
Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))
|
||||||
|
textplot(t(rt0), show.colnames=F)
|
||||||
|
title("Overall error rates by event type")
|
||||||
|
|
||||||
if (exists('compactPDF')) {
|
# plot per quality score recalibration table
|
||||||
compactPDF(args[2])
|
textplot(gsa.report$RecalTable1, show.rownames=F)
|
||||||
|
title("Rrror rates by event type and initial quality score")
|
||||||
|
|
||||||
|
if ( ! is.na(args[3]) ) {
|
||||||
|
dev.off()
|
||||||
|
if (exists('compactPDF')) {
|
||||||
|
compactPDF(args[2])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -175,12 +175,12 @@ public class RecalibrationArgumentCollection {
|
||||||
|
|
||||||
public File recalibrationReport = null;
|
public File recalibrationReport = null;
|
||||||
|
|
||||||
public GATKReportTable generateReportTable() {
|
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||||
argumentsTable.addColumn("Argument");
|
argumentsTable.addColumn("Argument");
|
||||||
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
argumentsTable.addRowID("covariate", true);
|
argumentsTable.addRowID("covariate", true);
|
||||||
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
|
||||||
argumentsTable.addRowID("no_standard_covs", true);
|
argumentsTable.addRowID("no_standard_covs", true);
|
||||||
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||||
|
|
|
||||||
|
|
@ -326,9 +326,23 @@ public class RecalUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a human-readable string representing the used covariates
|
||||||
|
*
|
||||||
|
* @param requestedCovariates a vector of covariates
|
||||||
|
* @return a non-null comma-separated string
|
||||||
|
*/
|
||||||
|
public static String covariateNames(final Covariate[] requestedCovariates) {
|
||||||
|
final List<String> names = new ArrayList<String>(requestedCovariates.length);
|
||||||
|
for ( final Covariate cov : requestedCovariates )
|
||||||
|
names.add(cov.getClass().getSimpleName());
|
||||||
|
return Utils.join(",", names);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||||
}
|
}
|
||||||
|
|
@ -352,7 +366,7 @@ public class RecalUtils {
|
||||||
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
|
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void outputRecalibrationPlot(Pair<PrintStream, File> files, boolean keepIntermediates) {
|
private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
|
||||||
final File csvFileName = files.getSecond();
|
final File csvFileName = files.getSecond();
|
||||||
final File plotFileName = new File(csvFileName + ".pdf");
|
final File plotFileName = new File(csvFileName + ".pdf");
|
||||||
files.getFirst().close();
|
files.getFirst().close();
|
||||||
|
|
@ -360,6 +374,7 @@ public class RecalUtils {
|
||||||
final RScriptExecutor executor = new RScriptExecutor();
|
final RScriptExecutor executor = new RScriptExecutor();
|
||||||
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
||||||
executor.addArgs(csvFileName.getAbsolutePath());
|
executor.addArgs(csvFileName.getAbsolutePath());
|
||||||
|
executor.addArgs(gatkReportFilename.getAbsolutePath());
|
||||||
executor.addArgs(plotFileName.getAbsolutePath());
|
executor.addArgs(plotFileName.getAbsolutePath());
|
||||||
executor.exec();
|
executor.exec();
|
||||||
|
|
||||||
|
|
@ -372,14 +387,14 @@ public class RecalUtils {
|
||||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
|
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
|
||||||
outputRecalibrationPlot(files, keepIntermediates);
|
outputRecalibrationPlot(filename, files, keepIntermediates);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
|
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
|
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
|
||||||
outputRecalibrationPlot(files, keepIntermediates);
|
outputRecalibrationPlot(filename, files, keepIntermediates);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue