This updates the script to produce a more tearsheet-like output for sample set statistics. Formatting will be updated for aesthetic improvements. There are also several database options that currently pull out misleading information because of changes in sequencing methodology that will be updated to show correct information. Eventually, plot formatting will be updated as well and additional informative plots will be added.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4988 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ffae7bf537
commit
6b5474a00a
|
|
@ -1,11 +1,27 @@
|
||||||
|
##put titles/rownames left
|
||||||
|
##make titles blue
|
||||||
|
##decrease margins below titles
|
||||||
|
## put row names in black
|
||||||
|
##put background rows in.
|
||||||
|
##change layouts so that it looks better
|
||||||
|
##get sample numbers in correctly
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.libPaths('~/Sting/R/') #but make sure this is universal
|
||||||
|
#put use scripts here
|
||||||
|
|
||||||
|
suppressMessages(library(gplots));
|
||||||
|
suppressMessages(library(ReadImages));
|
||||||
|
|
||||||
suppressMessages(library(gsalib));
|
suppressMessages(library(gsalib));
|
||||||
suppressMessages(library(ROracle));
|
suppressMessages(library(ROracle));
|
||||||
|
|
||||||
cmdargs = getargs(
|
cmdargs = gsa.getargs(
|
||||||
list(
|
list(
|
||||||
bamlist = list(value=NA, doc="list of BAM files"),
|
bamlist = list(value=NA, doc="list of BAM files"),
|
||||||
evalroot = list(value=NA, doc="VariantEval root"),
|
evalroot = list(value=NA, doc="VariantEval root"),
|
||||||
statsout = list(value=NA, doc="Output path for stats"),
|
tearxsout = list(value=NA, doc="Output path for tearsheet PDF"),
|
||||||
plotout = list(value=NA, doc="Output path for PDF")
|
plotout = list(value=NA, doc="Output path for PDF")
|
||||||
),
|
),
|
||||||
doc="Creates a tearsheet"
|
doc="Creates a tearsheet"
|
||||||
|
|
@ -54,110 +70,127 @@ dproj = d[which(squid_fclanes %in% fclanes),];
|
||||||
d2proj = d2[which(d2$"Project" %in% unique(dproj$"Project") & d2$"Sample" %in% dproj$"External ID"),];
|
d2proj = d2[which(d2$"Project" %in% unique(dproj$"Project") & d2$"Sample" %in% dproj$"External ID"),];
|
||||||
#}
|
#}
|
||||||
|
|
||||||
## Tear sheet components
|
tearsheetdrop <- "tearsheetdrop.jpg" #put the path to the tearsheet backdrop here
|
||||||
|
|
||||||
|
tearsheet<-function(){
|
||||||
|
|
||||||
|
pdf(file= cmdargs$statsout, width=22, height=15, pagecentre=TRUE, pointsize=24)
|
||||||
|
|
||||||
|
#define layout
|
||||||
|
layout(matrix(c(1,1,2,4,3,5), ncol=2, nrow=3, byrow=TRUE), heights=c(1, 2.5,2.5), respect=FALSE)
|
||||||
|
|
||||||
|
#prep for title bar
|
||||||
|
full<-strsplit(cmdargs$evalroot, "/")
|
||||||
|
name<-strsplit(full[[1]][length(full[[1]])], ".",fixed=TRUE)[[1]][1]
|
||||||
|
title=paste(name, ": TEAR SHEET", sep="")
|
||||||
|
drop<-read.jpeg(system.file(tearsheetdrop, package="gsalib"))
|
||||||
|
|
||||||
|
#plot title bar
|
||||||
|
par(mar=c(0,0,0,0))
|
||||||
|
plot(drop)
|
||||||
|
text(110, 45, title, family="serif", adj=c(0,0), cex=3, col=gray(.25))
|
||||||
|
|
||||||
|
|
||||||
# Project summary
|
# Project summary
|
||||||
projects = paste(unique(dproj$"Project"), collapse=", ");
|
projects = paste(unique(dproj$"Project"), collapse=", ");
|
||||||
cat(sprintf("Project Summary (%s)\n", projects), file=cmdargs$statsout, append=FALSE);
|
|
||||||
|
|
||||||
used_samples = length(unique(dproj$"External ID"));
|
used_samples = length(unique(dproj$"External ID"));
|
||||||
cat(sprintf("\tUsed samples: %s\n", used_samples), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
unused_samples = 0;
|
unused_samples = 0;
|
||||||
cat(sprintf("\tUnused samples: %s\n", unused_samples), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
sequencing_protocol = "Hybrid selection";
|
sequencing_protocol = "Hybrid selection"; #can this be extracted?
|
||||||
cat(sprintf("\tSequencing protocol: %s\n", sequencing_protocol), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
bait_design = paste(unique(dproj$"Bait Set"), collapse=", ");
|
bait_design = paste(unique(dproj$"Bait Set"), collapse=", ");
|
||||||
cat(sprintf("\tBait design: %s\n", bait_design), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
callable_target = paste(unique(dproj$"Target Territory"), collapse=", ");
|
callable_target = paste(unique(dproj$"Target Territory"), collapse=", ");
|
||||||
cat(sprintf("\tCallable target: %s\n", callable_target), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
# Bases summary per lane
|
table1<-rbind(paste(used_samples," used samples/", unused_samples + used_samples," total samples", sep=""), sequencing_protocol, bait_design, callable_target)
|
||||||
cat("\nBases summary (excluding unused lanes/samples)\n", file=cmdargs$statsout, append=TRUE);
|
print(nrow(table1))
|
||||||
|
rownames(table1)<-c("Samples","Sequencing Protocol", "Bait Design","Callable Target")
|
||||||
|
par(mar=c(4,4,4,4))
|
||||||
|
textplot(table1, col.rownames="darkblue", show.colnames=FALSE, cex=1.25)
|
||||||
|
title(main=sprintf("Project Summary (%s)\n", projects), family="sans", cex.main=1.75)
|
||||||
|
|
||||||
cat("\tBases summary per lane\n", file=cmdargs$statsout, append=TRUE);
|
# Bases summary
|
||||||
|
|
||||||
reads_per_lane_mean = mean(dproj$"PF Reads (HS)");
|
reads_per_lane_mean = signif(mean(dproj$"PF Reads (HS)"), 3);
|
||||||
reads_per_lane_sd = sd(dproj$"PF Reads (HS)");
|
reads_per_lane_sd = signif(sd(dproj$"PF Reads (HS)"), 3);
|
||||||
cat(sprintf("\t\tReads: %s +/- %s\n", reads_per_lane_mean, reads_per_lane_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-sprintf("%s +/- %s\n", reads_per_lane_mean, reads_per_lane_sd)
|
||||||
|
|
||||||
used_bases_per_lane_mean = mean(dproj$"PF HQ Aligned Q20 Bases");
|
used_bases_per_lane_mean = signif(mean(dproj$"PF HQ Aligned Q20 Bases"), 3);
|
||||||
used_bases_per_lane_sd = sd(dproj$"PF HQ Aligned Q20 Bases");
|
used_bases_per_lane_sd = signif(sd(dproj$"PF HQ Aligned Q20 Bases"), 3);
|
||||||
cat(sprintf("\t\tUsed bases: %s +/- %s\n", used_bases_per_lane_mean, used_bases_per_lane_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-c(lanes, sprintf("%s +/- %s\n", used_bases_per_lane_mean, used_bases_per_lane_sd));
|
||||||
|
|
||||||
target_coverage_mean = mean(na.omit(dproj$"Mean Target Coverage"));
|
target_coverage_mean = mean(na.omit(dproj$"Mean Target Coverage"));
|
||||||
target_coverage_sd = sd(na.omit(dproj$"Mean Target Coverage"));
|
target_coverage_sd = sd(na.omit(dproj$"Mean Target Coverage"));
|
||||||
cat(sprintf("\t\tTarget coverage: %0.2fx +/- %0.2fx\n", target_coverage_mean, target_coverage_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-c(lanes, sprintf("%0.2fx +/- %0.2fx\n", target_coverage_mean, target_coverage_sd));
|
||||||
|
|
||||||
pct_loci_gt_10x_mean = mean(na.omit(dproj$"Target Bases 10x %"));
|
pct_loci_gt_10x_mean = mean(na.omit(dproj$"Target Bases 10x %"));
|
||||||
pct_loci_gt_10x_sd = sd(na.omit(dproj$"Target Bases 10x %"));
|
pct_loci_gt_10x_sd = sd(na.omit(dproj$"Target Bases 10x %"));
|
||||||
cat(sprintf("\t\t%% loci > 10x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_10x_mean, pct_loci_gt_10x_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-c(lanes, sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_10x_mean, pct_loci_gt_10x_sd));
|
||||||
|
|
||||||
pct_loci_gt_20x_mean = mean(na.omit(dproj$"Target Bases 20x %"));
|
pct_loci_gt_20x_mean = mean(na.omit(dproj$"Target Bases 20x %"));
|
||||||
pct_loci_gt_20x_sd = sd(na.omit(dproj$"Target Bases 20x %"));
|
pct_loci_gt_20x_sd = sd(na.omit(dproj$"Target Bases 20x %"));
|
||||||
cat(sprintf("\t\t%% loci > 20x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_20x_mean, pct_loci_gt_20x_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-c(lanes,sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_20x_mean, pct_loci_gt_20x_sd));
|
||||||
|
|
||||||
pct_loci_gt_30x_mean = mean(na.omit(dproj$"Target Bases 30x %"));
|
pct_loci_gt_30x_mean = mean(na.omit(dproj$"Target Bases 30x %"));
|
||||||
pct_loci_gt_30x_sd = sd(na.omit(dproj$"Target Bases 30x %"));
|
pct_loci_gt_30x_sd = sd(na.omit(dproj$"Target Bases 30x %"));
|
||||||
cat(sprintf("\t\t%% loci > 30x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_30x_mean, pct_loci_gt_30x_sd), file=cmdargs$statsout, append=TRUE);
|
lanes<-c(lanes,sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_30x_mean, pct_loci_gt_30x_sd));
|
||||||
|
|
||||||
|
|
||||||
cat("\tBases summary per sample\n", file=cmdargs$statsout, append=TRUE);
|
reads_per_sample_mean = signif(mean(d2proj$"PF Reads"), 3);
|
||||||
|
reads_per_sample_sd = signif(sd(d2proj$"PF Reads"), 3);
|
||||||
|
samps<-sprintf("%s +/- %s\n", reads_per_sample_mean, reads_per_sample_sd);
|
||||||
|
|
||||||
reads_per_sample_mean = mean(d2proj$"PF Reads");
|
used_bases_per_sample_mean = signif(mean(d2proj$"PF HQ Aligned Q20 Bases"), 3);
|
||||||
reads_per_sample_sd = sd(d2proj$"PF Reads");
|
used_bases_per_sample_sd = signif(sd(d2proj$"PF HQ Aligned Q20 Bases"), 3);
|
||||||
cat(sprintf("\t\tReads: %s +/- %s\n", reads_per_sample_mean, reads_per_sample_sd), file=cmdargs$statsout, append=TRUE);
|
samps<-c(samps, sprintf("%s +/- %s\n", used_bases_per_sample_mean, used_bases_per_sample_sd));
|
||||||
|
|
||||||
used_bases_per_sample_mean = mean(d2proj$"PF HQ Aligned Q20 Bases");
|
|
||||||
used_bases_per_sample_sd = sd(d2proj$"PF HQ Aligned Q20 Bases");
|
|
||||||
cat(sprintf("\t\tUsed bases: %s +/- %s\n", used_bases_per_sample_mean, used_bases_per_sample_sd), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
target_coverage_mean = mean(na.omit(d2proj$"Mean Target Coverage"));
|
target_coverage_mean = mean(na.omit(d2proj$"Mean Target Coverage"));
|
||||||
target_coverage_sd = sd(na.omit(d2proj$"Mean Target Coverage"));
|
target_coverage_sd = sd(na.omit(d2proj$"Mean Target Coverage"));
|
||||||
cat(sprintf("\t\tTarget coverage: %0.2fx +/- %0.2fx\n", target_coverage_mean, target_coverage_sd), file=cmdargs$statsout, append=TRUE);
|
samps<-c(samps, sprintf("%0.2fx +/- %0.2fx\n", target_coverage_mean, target_coverage_sd));
|
||||||
|
|
||||||
pct_loci_gt_10x_mean = mean(na.omit(d2proj$"Target Bases 10x %"));
|
pct_loci_gt_10x_mean = mean(na.omit(d2proj$"Target Bases 10x %"));
|
||||||
pct_loci_gt_10x_sd = sd(na.omit(d2proj$"Target Bases 10x %"));
|
pct_loci_gt_10x_sd = sd(na.omit(d2proj$"Target Bases 10x %"));
|
||||||
cat(sprintf("\t\t%% loci > 10x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_10x_mean, pct_loci_gt_10x_sd), file=cmdargs$statsout, append=TRUE);
|
samps<-c(samps, sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_10x_mean, pct_loci_gt_10x_sd));
|
||||||
|
|
||||||
pct_loci_gt_20x_mean = mean(na.omit(d2proj$"Target Bases 20x %"));
|
pct_loci_gt_20x_mean = mean(na.omit(d2proj$"Target Bases 20x %"));
|
||||||
pct_loci_gt_20x_sd = sd(na.omit(d2proj$"Target Bases 20x %"));
|
pct_loci_gt_20x_sd = sd(na.omit(d2proj$"Target Bases 20x %"));
|
||||||
cat(sprintf("\t\t%% loci > 20x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_20x_mean, pct_loci_gt_20x_sd), file=cmdargs$statsout, append=TRUE);
|
samps<-c(samps, sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_20x_mean, pct_loci_gt_20x_sd));
|
||||||
|
|
||||||
pct_loci_gt_30x_mean = mean(na.omit(d2proj$"Target Bases 30x %"));
|
pct_loci_gt_30x_mean = mean(na.omit(d2proj$"Target Bases 30x %"));
|
||||||
pct_loci_gt_30x_sd = sd(na.omit(d2proj$"Target Bases 30x %"));
|
pct_loci_gt_30x_sd = sd(na.omit(d2proj$"Target Bases 30x %"));
|
||||||
cat(sprintf("\t\t%% loci > 30x covered: %0.2f%% +/- %0.2f%%\n", pct_loci_gt_30x_mean, pct_loci_gt_30x_sd), file=cmdargs$statsout, append=TRUE);
|
samps<-c(samps, sprintf("%0.2f%% +/- %0.2f%%\n", pct_loci_gt_30x_mean, pct_loci_gt_30x_sd));
|
||||||
|
|
||||||
# Sequencing summary
|
table2<-cbind(lanes, samps)
|
||||||
|
colnames(table2)<-c("Per lane", "Per sample")
|
||||||
|
print(nrow(table2))
|
||||||
|
|
||||||
|
rownames(table2)<-c("Reads", "Used bases", "Average target coverage", "% loci covered to 10x", "% loci covered to 20x","% loci covered to 30x")
|
||||||
|
par(mar=c(4,4,4,4))
|
||||||
|
textplot(table2, rmar=1, col.rownames="dark blue", cex=1.25)
|
||||||
|
title(main="Bases Summary", family="sans", cex.main=1.75)
|
||||||
cat("\nSequencing summary\n", file=cmdargs$statsout, append=TRUE);
|
cat("\nSequencing summary\n", file=cmdargs$statsout, append=TRUE);
|
||||||
|
|
||||||
instrument = "Illumina GA2";
|
|
||||||
cat(sprintf("\tSequencer: %s\n", instrument), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
|
# Sequencing summary
|
||||||
|
|
||||||
|
instrument = "Illumina GA2";#Can we get this?
|
||||||
used_lanes = nrow(dproj);
|
used_lanes = nrow(dproj);
|
||||||
cat(sprintf("\tUsed lanes: %s\n", used_lanes), file=cmdargs$statsout, append=TRUE);
|
unused_lanes_by_sequencing = 0; #can we get this?
|
||||||
|
|
||||||
unused_lanes_by_sequencing = 0;
|
|
||||||
unused_lanes_by_analysis = 0;
|
unused_lanes_by_analysis = 0;
|
||||||
cat(sprintf("\tUnused lanes: %s rejected by sequencing, %s by analysis\n", unused_lanes_by_sequencing, unused_lanes_by_analysis), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
lanes_per_sample_mean = mean(table(dproj$"External ID"));
|
lanes_per_sample_mean = mean(table(dproj$"External ID"));
|
||||||
lanes_per_sample_sd = sd(table(dproj$"External ID"));
|
lanes_per_sample_sd = sd(table(dproj$"External ID"));
|
||||||
lanes_per_sample_median = median(table(dproj$"External ID"));
|
lanes_per_sample_median = median(table(dproj$"External ID"));
|
||||||
cat(sprintf("\tLanes/sample: %0.1f +/- %0.1f lanes (median=%0.1f)\n", lanes_per_sample_mean, lanes_per_sample_sd, lanes_per_sample_median), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
lanes_paired = nrow(subset(dproj, dproj$"Lane Type" == "Paired"));
|
lanes_paired = nrow(subset(dproj, dproj$"Lane Type" == "Paired"));
|
||||||
lanes_widowed = nrow(subset(dproj, dproj$"Lane Type" == "Widowed"));
|
lanes_widowed = nrow(subset(dproj, dproj$"Lane Type" == "Widowed"));
|
||||||
lanes_single = nrow(subset(dproj, dproj$"Lane Type" == "Single"));
|
lanes_single = nrow(subset(dproj, dproj$"Lane Type" == "Single"));
|
||||||
cat(sprintf("\tLane parities: %s paired, %s widowed, %s single\n", lanes_paired, lanes_widowed, lanes_single), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
read_length_mean = mean(dproj$"Mean Read Length (P)");
|
read_length_mean = mean(dproj$"Mean Read Length (P)");
|
||||||
read_length_sd = sd(dproj$"Mean Read Length (P)");
|
read_length_sd = sd(dproj$"Mean Read Length (P)");
|
||||||
read_length_median = median(dproj$"Mean Read Length (P)");
|
read_length_median = median(dproj$"Mean Read Length (P)");
|
||||||
cat(sprintf("\tRead length: %0.1f +/- %0.1f bases (median=%0.1f)\n", read_length_mean, read_length_sd, read_length_median), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
date = dproj$"Run Date";
|
date = dproj$"Run Date";
|
||||||
date = sub("JAN", "01", date);
|
date = sub("JAN", "01", date);
|
||||||
|
|
@ -176,10 +209,17 @@ date = date[order(as.Date(date, format="%d-%m-%Y"))];
|
||||||
|
|
||||||
start_date = date[1];
|
start_date = date[1];
|
||||||
end_date = date[length(date)];
|
end_date = date[length(date)];
|
||||||
cat(sprintf("\tSequencing dates: %s to %s\n", start_date, end_date), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
|
table3<-rbind(instrument, used_lanes, sprintf("%s rejected by sequencing, %s by analysis\n", unused_lanes_by_sequencing, unused_lanes_by_analysis), sprintf("%0.1f +/- %0.1f lanes (median=%0.1f)\n", lanes_per_sample_mean, lanes_per_sample_sd, lanes_per_sample_median), sprintf("%s paired, %s widowed, %s single\n", lanes_paired, lanes_widowed, lanes_single), sprintf("%0.1f +/- %0.1f bases (median=%0.1f)\n", read_length_mean, read_length_sd, read_length_median), sprintf("\tSequencing dates: %s to %s\n", start_date, end_date))
|
||||||
|
print(nrow(table3))
|
||||||
|
|
||||||
|
rownames(table3)<-c("Sequencer", "Used lanes", "Unused lanes","Used lanes/sample", "Lane pariteies", "Read legnths", "Sequencing dates")
|
||||||
|
par(mar=c(4,4,4,4))
|
||||||
|
textplot(table3, rmar=1, col.rownames="dark blue", show.colnames=FALSE, cex=1.25)
|
||||||
|
title(main="Sequencing Summary", family="sans", cex.main=1.75)
|
||||||
|
|
||||||
# Variant summary
|
# Variant summary
|
||||||
cat("\nVariant summary\n", file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
eval.counts = read.csv(paste(cmdargs$evalroot, ".Count_Variants.csv", sep=""), header=TRUE, comment.char="#");
|
eval.counts = read.csv(paste(cmdargs$evalroot, ".Count_Variants.csv", sep=""), header=TRUE, comment.char="#");
|
||||||
eval.counts.called = subset(eval.counts, evaluation_name == "eval" & comparison_name == "dbsnp" & jexl_expression == "none" & filter_name == "called");
|
eval.counts.called = subset(eval.counts, evaluation_name == "eval" & comparison_name == "dbsnp" & jexl_expression == "none" & filter_name == "called");
|
||||||
|
|
@ -193,17 +233,24 @@ eval.titv.called.all = subset(eval.titv.called, novelty_name == "all")$ti.tv_rat
|
||||||
eval.titv.called.known = subset(eval.titv.called, novelty_name == "known")$ti.tv_ratio;
|
eval.titv.called.known = subset(eval.titv.called, novelty_name == "known")$ti.tv_ratio;
|
||||||
eval.titv.called.novel = subset(eval.titv.called, novelty_name == "novel")$ti.tv_ratio;
|
eval.titv.called.novel = subset(eval.titv.called, novelty_name == "novel")$ti.tv_ratio;
|
||||||
|
|
||||||
vars = matrix(c(eval.counts.called.all, eval.counts.called.known, eval.counts.called.novel, eval.titv.called.all, eval.titv.called.known, eval.titv.called.novel, "3.0 - 3.2", "3.2 - 3.4", "2.7 - 3.0"), nrow=3);
|
table4 = matrix(c(eval.counts.called.all, eval.counts.called.known, eval.counts.called.novel, eval.titv.called.all, eval.titv.called.known, eval.titv.called.novel, "3.0 - 3.2", "3.2 - 3.4", "2.7 - 3.0"), nrow=3);
|
||||||
rownames(vars) = c("All", "Known", "Novel");
|
print(nrow(table4))
|
||||||
colnames(vars) = c("Found", "Ti/Tv ratio", "Expected Ti/Tv ratio");
|
|
||||||
|
|
||||||
cat(sprintf("\t\tFound\tTi/Tv ratio\tExpected Ti/Tv ratio\n"), file=cmdargs$statsout, append=TRUE);
|
rownames(table4) = c("All", "Known", "Novel");
|
||||||
cat(sprintf("\tAll\t%s\t%s\t\t%s\n", eval.counts.called.all, eval.titv.called.all, "3.0 - 3.2"), file=cmdargs$statsout, append=TRUE);
|
colnames(table4) = c("Found", "Ti/Tv ratio", "Expected Ti/Tv ratio");
|
||||||
cat(sprintf("\tKnown\t%s\t%s\t\t%s\n", eval.counts.called.known, eval.titv.called.known, "3.2 - 3.4"), file=cmdargs$statsout, append=TRUE);
|
|
||||||
cat(sprintf("\tNovel\t%s\t%s\t\t%s\n", eval.counts.called.novel, eval.titv.called.novel, "2.7 - 3.0"), file=cmdargs$statsout, append=TRUE);
|
|
||||||
|
|
||||||
|
textplot(table4, rmar=1, col.rownames="dark blue", cex=1.25)
|
||||||
|
title(main="Variant Summary", family="sans", cex.main=1.75)
|
||||||
|
|
||||||
|
dev.off()
|
||||||
|
}
|
||||||
|
|
||||||
|
tearsheet()
|
||||||
|
|
||||||
# Plots
|
# Plots
|
||||||
|
plots<-function(){
|
||||||
eval.bysample = read.csv(paste(cmdargs$evalroot, ".SimpleMetricsBySample.csv", sep=""), header=TRUE, comment.char="#");
|
eval.bysample = read.csv(paste(cmdargs$evalroot, ".SimpleMetricsBySample.csv", sep=""), header=TRUE, comment.char="#");
|
||||||
eval.bysample.called = subset(eval.bysample, evaluation_name == "eval" & comparison_name == "dbsnp" & jexl_expression == "none" & filter_name == "called");
|
eval.bysample.called = subset(eval.bysample, evaluation_name == "eval" & comparison_name == "dbsnp" & jexl_expression == "none" & filter_name == "called");
|
||||||
eval.bysample.called.all = subset(eval.bysample.called, novelty_name == "all");
|
eval.bysample.called.all = subset(eval.bysample.called, novelty_name == "all");
|
||||||
|
|
@ -243,3 +290,6 @@ points(eval.func.called.novel$Synonymous[ind] / (eval.func.called.novel$Missense
|
||||||
legend("topright", c("All", "Known", "Novel"), col=c("black", "blue", "red"), pt.cex=1.3, pch=21);
|
legend("topright", c("All", "Known", "Novel"), col=c("black", "blue", "red"), pt.cex=1.3, pch=21);
|
||||||
|
|
||||||
dev.off();
|
dev.off();
|
||||||
|
}
|
||||||
|
|
||||||
|
plots()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue