#These functions each make a page for the ADPR. They assume a pdf with the following parameters for best formatting: #pdf(file=paste(sample_sets, ".pdf", sep=""), width=22, height=15, pagecentre=TRUE, pointsize=24) library(gplots) library(ReadImages) ##defaults<-par(no.readonly = TRUE) tearsheet<-function(lanetable, sampletable, variant, Protocol, Sequencer){ #define layout layout(matrix(c(1,1,2,4,3,5), ncol=2, nrow=3, byrow=TRUE), heights=c(1, 2.5,2.5,), respect=FALSE) #prep for title bar title=paste(sample_sets, ": TEAR SHEET", sep="") drop<-read.jpeg("tearsheetdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=3, col=gray(.25)) #calc by lane stuff sdlane<-rep("NA", 6) meanlane<-sdlane attach(lanetable); callable.target<-HS_TARGET_TERRITORY[1]; singlelanes<-length(which(Lane.Type=="Single")); pairedlanes<-length(which(Lane.Type=="Paired")); meanlane[1]<-round(mean(AL_TOTAL_READS, na.rm=TRUE)/10^6, 2); sdlane[1]<-round(sd(AL_TOTAL_READS, na.rm=TRUE)/10^6, 2); meanlane[2]<-round(mean(HS_ON_TARGET_BASES, na.rm=TRUE)/10^6, 2); sdlane[2]<-round(sd(HS_ON_TARGET_BASES, na.rm=TRUE)/10^6, 2); meanlane[3]<-round(mean(HS_MEAN_TARGET_COVERAGE, na.rm=TRUE)); sdlane[3]<-round(sd(HS_MEAN_TARGET_COVERAGE, na.rm=TRUE)); meanlane[4]<-round(mean(HS_PCT_TARGET_BASES_10X, na.rm=TRUE)); meanlane[5]<-round(mean(HS_PCT_TARGET_BASES_20X, na.rm=TRUE)); meanlane[6]<-round(mean(HS_PCT_TARGET_BASES_30X, na.rm=TRUE)); sdlane[4]<-round(sd(HS_PCT_TARGET_BASES_10X, na.rm=TRUE)); sdlane[5]<-round(sd(HS_PCT_TARGET_BASES_20X, na.rm=TRUE)); sdlane[6]<-round(sd(HS_PCT_TARGET_BASES_30X, na.rm=TRUE)) names<-paste(Flowcell, "-", Lane, sep="") detach(lanetable) meansamp<-rep("NA", 6) sdsamp<-meansamp #Calc by sample metrics attach(bysample); baits<-Bait.Set[1] alllanes<-signif(sum(X..Lanes.included.in.aggregation, na.rm = TRUE)) mean.lanes.samp<-signif(mean(X..Lanes.included.in.aggregation, na.rm = TRUE)); sd.lanes.samp<-signif(sd(X..Lanes.included.in.aggregation, na.rm=TRUE)); mean.mrl.samp<-signif(mean(Mean.Read.Length, na.rm=TRUE)); sd.mrl.samp<-signif(sd(Mean.Read.Length, na.rm=TRUE)); meansamp[1]<-round(mean(Total.Reads, na.rm=TRUE)/10^6, 2); sdsamp[1]<-round(sd(Total.Reads, na.rm=TRUE)/10^6, 2); meansamp[2]<-round(mean(On.Target.Bases..HS., na.rm=TRUE)/10^6, 2); sdsamp[2]<-round(sd(On.Target.Bases..HS., na.rm=TRUE)/10^6, 2); meansamp[3]<-round(mean(Mean.Target.Coverage..HS., na.rm=TRUE)); sdsamp[3]<-round(sd(Mean.Target.Coverage..HS., na.rm=TRUE)); meansamp[4]<-round(mean(PCT.Target.Bases.10x..HS., na.rm=TRUE)); meansamp[5]<-round(mean(PCT.Target.Bases.20x..HS., na.rm=TRUE)); meansamp[6]<-round(mean(PCT.Target.Bases.30x..HS., na.rm=TRUE)); sdsamp[4]<-round(sd(PCT.Target.Bases.10x..HS., na.rm=TRUE)); sdsamp[5]<-round(sd(PCT.Target.Bases.20x..HS., na.rm=TRUE)); sdsamp[6]<-round(sd(PCT.Target.Bases.30x..HS., na.rm=TRUE)); detach(bysample); #calc variant stuff attach(variant) SNPS<-c(ti_count[which(filter_name=="called")]+tv_count[which(filter_name=="called")]) titvs<-c(ti.tv_ratio[which(filter_name=="called")]) detach(variant) #prep stuff. summary<-c(nrow(bysample), Protocol, baits, paste(callable.target, "bases")) summary2<-c(Sequencer, alllanes, paste(mean.lanes.samp, "+/-", sd.lanes.samp), paste(singlelanes, "single lanes,", pairedlanes, "paired lanes"), paste(mean.mrl.samp, "+/-", sd.mrl.samp)) samps<-paste(meansamp, c("M", "M", "x", "%", "%", "%"), " +/- ", sdsamp, c("M", "M", "x", "%", "%", "%"), sep="") lanes<-paste(meanlane, c("M", "M", "x", "%", "%", "%"), " +/- ", sdlane, c("M", "M", "x", "%", "%", "%"), sep="") #print out 4 tables in R table1<-cbind(summary) rownames(table1)<-c("Samples","Sequencing Protocol", "Bait Design","Callable Target") par(mar=c(4,4,4,4)) textplot(table1, col.rownames="darkblue", show.colnames=FALSE, cex=1.75) title(main="Project Summary", family="sans", cex.main=2) table2<-cbind(lanes, samps) colnames(table2)<-c("per lane", "per sample") rownames(table2)<-c("Reads", "Used bases", "Average target coverage", "% loci covered to 10x", "% loci covered to 20x","% loci covered to 10x") par(mar=c(4,4,4,4)) textplot(table2, rmar=1, col.rownames="dark blue", cex=1.25) title(main="Bases Summary", family="sans", cex.main=1.75) table3<-cbind(summary2) rownames(table3)<-c("Sequencer", "Used lanes", "Used lanes per sample", "Lane pariteies", "Read legnths") par(mar=c(4,4,4,4)) textplot(table3, rmar=1, col.rownames="dark blue", show.colnames=FALSE, cex=1.25) title(main="Sequencing Summary", family="sans", cex.main=1.75) table4<-cbind(SNPS, titvs) rownames(table4)<-c("All SNPs", "Known SNPs", "Novel SNPs") colnames(table4)<-c("SNPs Found", "Ti/Tv") textplot(table4, rmar=1, col.rownames="dark blue", cex=1.25) title(main="Variant Summary", family="sans", cex.main=1.75) } fingerprints<-function(lanetable, sample_sets){ attach(lanetable) #define layout layout(matrix(c(1,2,3), ncol=1, nrow=3, byrow=TRUE), heights=c(1, 3,2), respect=FALSE) #prep for title bar title=paste(sample_sets, ": Fingerprint Status", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=3, col=gray(.25)) #prep for FP plot badsnps<-union(which(FP_CONFIDENT_MATCHING_SNPS<15), which(FP_CONFIDENT_MATCHING_SNPS<15)) colors<-c(rep("Blue", length(FP_CONFIDENT_CALLS))) colors[badsnps]<-"Red" ticks<-c(match(unique(Flowcell), Flowcell) ) ys=rep(c(0, max(SNP_TOTAL_SNPS, na.rm=TRUE)*1.04, max(SNP_TOTAL_SNPS, na.rm=TRUE)*1.04, 0, 0), ceiling(length(ticks)/2)) shader<-ticks[c(rep(c(1,1,2,2,1), ceiling(length(ticks)/2))+sort(rep(seq(0, length(ticks),by=2), 5)))]-0.5 if((length(ticks)%%2 > 0)){ shader[(length(shader)-2):(length(shader)-1)]<-length(Flowcell)+0.5 } shader<-na.omit(shader) #plot FP plot par(mar=c(10, 6, 8, 3)) plot(1:length(FP_CONFIDENT_MATCHING_SNPS), FP_CONFIDENT_MATCHING_SNPS, pch=NA, ylim=c(0,24), ylab="Fingerprint calls", xlab="", xaxt="n", col=colors, main="Fingerprint Calling and Matching Sorted by Flowcell", cex.main=2) axis(side=3, at=c(1:length(Flowcell)), labels=Lane[order(Flowcell)], cex.axis=0.5, padj=1,tick=FALSE) axis(side=1, at=c(ticks), labels=sort(unique(Flowcell)), tick=FALSE, las=2) mtext("Lane",side=3, cex=.75, line=1.5) mtext("Flowcell",side=1, cex=1.25, line=8) polygon(shader, ys, border="black", lty=0, col="gray") points(1:length(FP_CONFIDENT_MATCHING_SNPS), FP_CONFIDENT_MATCHING_SNPS, pch=4, col=colors) points(1:length(FP_CONFIDENT_MATCHING_SNPS), FP_CONFIDENT_CALLS, pch=3, col=colors) if(length(badsnps)>0){ legend("bottomright", legend=c("Confident calls at fingerprint sites by lane", "Confident matching calls at fingerprint sites by lane", "Confident calls in bad lanes", "Confident matching calls in bad lanes", "All Confident calls match fingerprint sites"), pch=c(4,3,4,3,8), col=c("Blue", "Blue", "Red", "Red", "Black" ), bg="White") mtext("Some problematic fingerprint sites", side=3) }else{ legend("bottomright", legend=c("Confident calls at fingerprint sites by lane", "Confident matching calls at fingerprint sites by lane", "All Confident calls match fingerprint sites"), pch=c(4, 3, 8), col="Blue", bg="White") } #plot some summary of FP stuff textplot("Some summary of Fingerprint problems will go here ", valign="top", family="sans") detach(lanetable) } snps_called<-function(lanetable, sample_sets){ attach(lanetable) #define layout for this page layout(matrix(c(1,1,2, 3, 4,4), ncol=2, nrow=3, byrow=TRUE), widths = c(3,1), heights=c(1, 3,2), respect=FALSE) #prep for title bar title=paste(sample_sets, ": SNPs Called by Lane", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=3, col=gray(.25)) #prep for snp plot ticks<-c(match(unique(Flowcell), sort(Flowcell)) ) ys=rep(c(min(SNP_TOTAL_SNPS, na.rm=TRUE), max(SNP_TOTAL_SNPS, na.rm=TRUE)*1.04, max(SNP_TOTAL_SNPS, na.rm=TRUE)*1.04, min(SNP_TOTAL_SNPS, na.rm=TRUE), min(SNP_TOTAL_SNPS, na.rm=TRUE)), ceiling(length(ticks)/2)) shader<-ticks[c(rep(c(1,1,2,2,1), ceiling(length(ticks)/2))+sort(rep(seq(0, length(ticks),by=2), 5)))]-0.5 if((length(ticks)%%2 > 0)){ shader[(length(shader)-2):(length(shader)-1)]<-length(Flowcell)+0.5 } shader<-na.omit(shader) cols<-rep("blue", length(SNP_TOTAL_SNPS)) cols[which(SNP_TOTAL_SNPS %in% boxplot.stats(SNP_TOTAL_SNPS)$out)]<-"red" #plot snp plot par(ylog=TRUE, mar=c(10, 6, 4, 0)) plot(1:length(SNP_TOTAL_SNPS), SNP_TOTAL_SNPS[order(Flowcell)],xlab="", ylab="SNPs Called", ylim = c(min(SNP_TOTAL_SNPS, na.rm=TRUE), max(SNP_TOTAL_SNPS, na.rm=TRUE)), xaxt="n", pch=NA) title(main="SNPs Called in Each Lane sorted by Flowcell", line=3, cex=1.5) axis(side=3, at=c(1:length(Flowcell)), labels=Lane[order(Flowcell)], cex.axis=0.5, padj=1,tick=FALSE) axis(side=1, at=c(ticks), labels=sort(unique(Flowcell)), tick=FALSE, las=2) mtext("Lane",side=3, cex=.75, line=1.5) mtext("Flowcell",side=1, cex=1.25, line=8) polygon(shader, ys, border="black", lty=0, col="gray") points(1:length(SNP_TOTAL_SNPS), SNP_TOTAL_SNPS, col=cols, pch=19) if(length(boxplot.stats(SNP_TOTAL_SNPS)$out)>0){ legend("topright", legend=c("Normal SNP Call Counts", "Outlier SNP Call Counts"), pch=19, col=c("Blue", "red"), bg="White") } #plot boxplot par(ylog=TRUE, mar=c(10, 0, 4, 2)) boxplot(SNP_TOTAL_SNPS, main="SNPs Called in Lane", ylab="", yaxt="n", ylim = c(min(SNP_TOTAL_SNPS, na.rm=TRUE), max(SNP_TOTAL_SNPS, na.rm=TRUE)), ylog=TRUE) if(length(boxplot.stats(SNP_TOTAL_SNPS)$out)==0){ mtext("No outliers", side=1, line=4) }else{ mtext(paste("Outlier SNP call counts in ", length(boxplot.stats(SNP_TOTAL_SNPS)$out), "lanes"), side=1, line=4) } #Plot variant summary below textplot("Variant Summary will go here", valign="top", family="sans") detach(lanetable) } titvsamp<-function(metricsbysamp){ attach(titv) #define layout layout(matrix(c(1,2,3), ncol=1, nrow=3, byrow=TRUE), heights=c(1, 3,2), respect=FALSE) #prep for title bar title=paste(sample_sets, ": Ti/Tv Ratio by Sample", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=3, col=gray(.25)) #prep for titv graph boxplot.stats(TiTvRatio[which(filter_name=="filtered")])$stats[5]->min shade<-which(sort(TiTvRatio[which(novelty_name=="novel" & filter_name=="called")], decreasing=TRUE)0.3) #this can be changed to any kind of filter for particular lanes colors<-rainbow(ncol(errpercycle), s=0.5, v=0.5) colors[crazies]<-rainbow(length(crazies)) weights<-rep(1, ncol(errpercycle)) weights[crazies]<-2 #plot erprp graph par(mar=c(6, 6, 3, 2)) matplot(errpercycle, type="l", lty="solid", col=colors, lwd=weights, main="Error Rate per Read Position", ylab="Error Rate", xlab="Cycle/Read Position", log="y", cex.main=2, cex.lab=1.5, cex.axis=1.25, ) if(length(crazies)>0){ legend("topleft", title="Unusual Lanes", legend=colnames(errpercycle)[crazies], lty="solid", lwd=2, col=colors[crazies], xjust=0.5) }else{ mtext("No unusual lanes.", 1, line=6, cex=1.25) } #Plot variant summary below textplot("Something related will go here", valign="top", family="sans") } depth_target<-function(DOC){ #define layout layout(matrix(c(1,2), ncol=1, nrow=2, byrow=TRUE), heights=c(1, 5), respect=FALSE) #prep for title bar title=paste(sample_sets, ": Depth of Coverage By Target", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=1.75, col=gray(.25)) colnames(DOC)->cols apply(DOC[,grep("mean", cols)], 1, median)->medianofmeans apply(DOC[,grep("mean", cols)], 1, quantile, probs=3/4)->q3s apply(DOC[,grep("mean", cols)], 1, quantile, probs=1/4)->q1s par(ylog=FALSE, mar=c(5, 5, 4, 2)) plot(c(1:3122),sort(medianofmeans, decreasing=TRUE), type="l",log="y",ylab="Coverage", xlab="",xaxt="n", main="Coverage Across All Targets", lwd=2, cex.main=2.5, cex.lab=1.5, cex.axis=1.25) mtext("Targets sorted by median avereage coverage across sample", side=1, line=1, cex=1.5) abline(h=10, lty="dashed", lwd=3) lines(c(1:3122),q3s[order(medianofmeans, decreasing=TRUE)], col="dark blue") lines(c(1:3122),q1s[order(medianofmeans, decreasing=TRUE)], col="dark blue") legend(c(0, 20), legend="10x coverage", box.lty=0, lwd=3, lty="dashed") legend("bottomleft", legend=c("Median average target coverage across all samples", "First and third quartiles of average target across all sample"), box.lty=0, lwd=c(1,2), col=c("black", "dark blue"), lty="solid") #define layout layout(matrix(c(1,2), ncol=1, nrow=2, byrow=TRUE), heights=c(1,5), respect=FALSE) #prep for title bar title=paste(sample_sets, ": Depth of Coverage For Poorly Covered Targets", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=1.25, col=gray(.25)) yuck<-DOC[which(medianofmeans<10),grep("mean", cols)] yuck<-yuck+0.1 par(mar=c(17, 4, 4, 2)) boxplot(t(yuck[order(medianofmeans[which(medianofmeans<10)], decreasing=TRUE),]),log="y", yaxt="n", xaxt="n", cex.lab=1.15, cex.axis=1.05, ylab="Average coverage accross all samples", main="Targets with low coverage accross samples") axis(2, at=axTicks(2)+c(0, rep(0.1, length(axTicks(2))-1)), labels=c(0.0, axTicks(2)[2:length(axTicks(2))]), cex.axis=0.75) mtext("Target", side=1, line=15, cex=1.5) axis(1, at=c(1:length(which(medianofmeans<10))), labels=rownames(DOC[which(medianofmeans<10),])[order(medianofmeans[which(medianofmeans<10)])], las=2, cex.axis=1.15) } depth_sample<-function(DOC2){ #define layout layout(matrix(c(1,2), ncol=1, nrow=2, byrow=TRUE), heights=c(1,5), respect=FALSE) #prep for title bar title=paste(sample_sets, ": Mean Depth of Coverage per Base by Sample", sep="") drop<-read.jpeg("adprdrop.jpg") #plot title bar par(mar=c(0,0,0,0)) plot(drop) text(100, 40, title, family="serif", adj=c(0,0), cex=1.25, col=gray(.25)) #prep for bysample means<-c(sort(DOC2[which(DOC2[,2]<250),2]), rep(250, (length(which(DOC2[,2]>=250))-1))) types<-rep(20, length(means)) cols<-rep("black", length(means)) types[which(means==250)]<-8 cols[which(means==250)]<-"red" #plot doc by sample par(mar=c(10, 4, 4, 2)) plot(means, ylim=c(0, 250), xaxt="n", col=cols, pch=types, xlab="", ylab="Depth of Coverage") > axis(1, at=c(1:(nrow(DOC2)-1)), labels=c(rownames(DOC2[which(DOC2[,2]<250),])[order(DOC2[which(DOC2[,2]<250),2])], rownames(DOC2[which(DOC2[,2]>=250),])[order(which(DOC2[,2]>=250))][1:(length(which(DOC2[,2]>=250))-1)]), las=2) > mtext("Samples", side=1, line=7, cex=1.25) } datapuller<-function(setname){ #library(yaml) strsplit(setname, ".")[1]->projectname lanes<-read.delim(paste(projectname, "_lanes.txt", sep=""), header=TRUE) samps<-read.delim(paste(projectname, "_samps.txt", sep=""), header=TRUE) #doct<-read.delim(paste(setname, "depth.sample_interval_summary", sep=""), header=TRUE, row.names=1) #docs<-read.delim(paste(setname, ".depth.sample_summary", sep=""), header=TRUE, row.names=1) #eval<-read.csv(paste(setname, "eval.CountFunctionalClasses", sep=""), skip=1) titv<-read.csv(paste(setname, ".eval.SimpleMetricsBySample.csv", sep=""), skip=1) #erprp<-read.delim(paste(setname, ".erprp", sep="")) colnames(lanes)<-c('Initiative','Project','GSSR.ID','External.ID','WR.ID','Flowcell','Lane','Lane.Type','Library','AL_TOTAL_READS','AL_PF_READS','AL_PCT_PF_READS','AL_PF_NOISE_READS','AL_PF_READS_ALIGNED','AL_PCT_PF_READS_ALIGNED','AL_PF_HQ_ALIGNED_READS','AL_PF_HQ_ALIGNED_BASES','AL_PF_HQ_ALIGNED_Q20_BASES','AL_PF_HQ_MEDIAN_MISMATCHES','AL_MEAN_READ_LENGTH','AL_READS_ALIGNED_IN_PAIRS','AL_PCT_READS_ALIGNED_IN_PAIRS','AL_BAD_CYCLES','AL_PCT_STRAND_BALANCE','DUP_UNPAIRED_READS_EXAMINED','DUP_READ_PAIRS_EXAMINED','DUP_UNMAPPED_READS','DUP_UNPAIRED_READ_DUPLICATES','DUP_READ_PAIR_DUPLICATES','DUP_PERCENT_DUPLICATION','DUP_ESTIMATED_LIBRARY_SIZE','HS_BAIT_SET','HS_GENOME_SIZE','HS_LIBRARY_SIZE','HS_BAIT_TERRITORY','HS_TARGET_TERRITORY','HS_BAIT_DESIGN_EFFICIENCY','HS_TOTAL_READS','HS_PF_READS','HS_PF_UNIQUE_READS','HS_PCT_PF_READS','HS_PCT_PF_UQ_READS','HS_PCT_PF_UQ_READS_ALIGNED','HS_PF_UQ_READS_ALIGNED','HS_PF_UQ_BASES_ALIGNED','HS_ON_BAIT_BASES','HS_NEAR_BAIT_BASES','HS_OFF_BAIT_BASES','HS_ON_TARGET_BASES','HS_PCT_SELECTED_BASES','HS_PCT_OFF_BAIT','HS_ON_BAIT_VS_SELECTED','HS_MEAN_BAIT_COVERAGE','HS_MEAN_TARGET_COVERAGE','HS_FOLD_ENRICHMENT','HS_ZERO_CVG_TARGETS_PCT','HS_FOLD_80_BASE_PENALTY','HS_PCT_TARGET_BASES_2X','HS_PCT_TARGET_BASES_10X','HS_PCT_TARGET_BASES_20X','HS_PCT_TARGET_BASES_30X','HS_PENALTY_10X','HS_PENALTY_20X','HS_PENALTY_30X','SNP_TOTAL_SNPS','SNP_PCT_DBSNP','SNP_NUM_IN_DBSNP','Lane.IC.Matches','Lane.IC.PCT.Mean.RD1.Err.Rate','Lane.IC.PCT.Mean.RD2.Err.Rate','FP_PANEL_NAME','FP_PANEL_SNPS','FP_CONFIDENT_CALLS','FP_CONFIDENT_MATCHING_SNPS','FP_CONFIDENT_CALLED_PCT','FP_CONFIDENT_MATCHING_SNPS_PCT','LPCNCRD_REFERENCE','LPCNCRD_NON_REFERENCE','LPCNCRD_PCT_CONCORDANCE') files<-list(c(lanes, samps, doct, docs, eval, titv, erprp)) return(files) } runner<-function(basename, desc1, desc2){ datapuller(basename)->tables attach(tables) pdf(paste(basename, ".pdf", sep=""), width=22, height=15,pointsize=24) tearsheet(lanes, samps, titv, desc1, desc1) fingerprints(lanes) snps_called(lanes) titvsamp(titv) #functionalclasses(eval) #errorratepercycle(erprp) #depth_target(doct) #depth_sample(docs) dev.off() detach(tables) } if(length(commandArgs(TRUE))>0){ runner(commandArgs(TRUE)) }