From 3d27e5eb98eb0b31c9d56053a70626cf5a11f6cf Mon Sep 17 00:00:00 2001 From: droazen Date: Wed, 22 Jun 2011 22:55:53 +0000 Subject: [PATCH] Default operating parameters in addition to the parameterized Rscript version. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6067 348d0f76-0448-11de-a6fe-93d51630548a --- R/exomePreQC.R | 64 +++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/R/exomePreQC.R b/R/exomePreQC.R index a04a998e9..8d04ea99c 100644 --- a/R/exomePreQC.R +++ b/R/exomePreQC.R @@ -2,8 +2,13 @@ args = commandArgs(TRUE) onCMDLine = ! is.na(args[1]) if ( onCMDLine ) { - inputTSV = args[1] - outputPDF = args[2] + reference_dataset = '/Users/mhanna/metrics.perSample.formatted.table' + inputTSV = args[2] + outputPDF = args[3] +} else { + reference_dataset = '/Users/mhanna/metrics.perSample.formatted.table' + inputTSV = 'GoT2D_exomes_batch_005.tsv' + outputPDF = 'T2D.pdf' } require('ggplot2') @@ -11,35 +16,40 @@ require('ggplot2') inputTSV = "GoT2D_exomes_batch_005.tsv" data <- read.table(inputTSV,header=T) -fingerprint_lods = list() -for(i in 1:nrow(data)) { - fingerprint_lods[[as.character(data$sample[i])]] <- eval(parse(text=data$FINGERPRINT_LODS[i])) -} - -fingerprint_lod_order = order(unlist(lapply(fingerprint_lods,median),use.names=F)) - -pdf(outputPDF) -boxplot(fingerprint_lods[fingerprint_lod_order],las=3,main='Fingerprint LOD Scores By Sample',xlab='Sample',ylab='LOD Score Distribution',cex.axis=0.65) - -complete <- read.table('/Users/mhanna/metrics.perSample.formatted.table',header=T) +complete <- read.table(reference_dataset,header=T) novel <- subset(complete,exon_intervals == "whole_exome_agilent_1.1_refseq_plus_3_boosters"&Novelty=="novel"&FunctionalClass=="all") selected_samples <- novel$Sample %in% data$sample novel_with_highlights <- cbind(novel,selected_samples) -qplot(Sample,Selected_Bases_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='On+Near Bait Bases/PF Bases Aligned per Sample') -qplot(Sample,Mean_Target_Coverage,data=novel_with_highlights,color=selected_samples) + opts(title='Mean Target Coverage per Sample') -qplot(Sample,Zero_Coverage_Targets_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% of Targets with <2x Coverage per Sample') -qplot(Sample,Fold_80_Base_Penalty,data=novel_with_highlights,color=selected_samples) + opts(title='Fold 80 Base Penalty per Sample') -qplot(Sample,Target_Bases_20x_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% Target Bases Achieving >20x Coverage per Sample') -qplot(Sample,PF_Reads_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% PF Reads Aligned per Sample') -qplot(Sample,PF_HQ_Error_Rate,data=novel_with_highlights,color=selected_samples) + opts(title='% HQ Bases mismatching the Reference per Sample') -qplot(Sample,Mean_Read_Length,data=novel_with_highlights,color=selected_samples) + opts(title='Median Read Length per Sample') -qplot(Sample,Bad_Cycles,data=novel_with_highlights,color=selected_samples) + opts(title='# Bad Cycles per Sample') -qplot(Sample,Strand_Balance_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% PF Reads Aligned to the + Strand per Sample') -qplot(Sample,Total_SNPs,data=novel_with_highlights,color=selected_samples) + opts(title='# SNPs called per Sample') -qplot(Sample,dbSNP_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% SNPs in dbSNP per Sample') -qplot(PCT_DBSNP,data=data,geom="histogram") + opts(title='% SNPs in dbSNP per Sample') -dev.off() +if(onCMDLine) { + fingerprint_lods = list() + for(i in 1:nrow(data)) { + fingerprint_lods[[as.character(data$sample[i])]] <- eval(parse(text=data$FINGERPRINT_LODS[i])) + } + + fingerprint_lod_order = order(unlist(lapply(fingerprint_lods,median),use.names=F)) + + pdf(outputPDF) + boxplot(fingerprint_lods[fingerprint_lod_order],las=3,main='Fingerprint LOD Scores By Sample',xlab='Sample',ylab='LOD Score Distribution',cex.axis=0.65) + + qplot(Sample,Selected_Bases_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='On+Near Bait Bases/PF Bases Aligned per Sample') + qplot(Sample,Mean_Target_Coverage,data=novel_with_highlights,color=selected_samples) + opts(title='Mean Target Coverage per Sample') + qplot(Sample,Zero_Coverage_Targets_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% of Targets with <2x Coverage per Sample') + qplot(Sample,Fold_80_Base_Penalty,data=novel_with_highlights,color=selected_samples) + opts(title='Fold 80 Base Penalty per Sample') + qplot(Sample,Target_Bases_20x_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% Target Bases Achieving >20x Coverage per Sample') + qplot(Sample,PF_Reads_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% PF Reads Aligned per Sample') + qplot(Sample,PF_HQ_Error_Rate,data=novel_with_highlights,color=selected_samples) + opts(title='% HQ Bases mismatching the Reference per Sample') + qplot(Sample,Mean_Read_Length,data=novel_with_highlights,color=selected_samples) + opts(title='Median Read Length per Sample') + qplot(Sample,Bad_Cycles,data=novel_with_highlights,color=selected_samples) + opts(title='# Bad Cycles per Sample') + qplot(Sample,Strand_Balance_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% PF Reads Aligned to the + Strand per Sample') + qplot(Sample,Total_SNPs,data=novel_with_highlights,color=selected_samples) + opts(title='# SNPs called per Sample') + qplot(Sample,dbSNP_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% SNPs in dbSNP per Sample') + qplot(PCT_DBSNP,data=data,geom="histogram") + opts(title='% SNPs in dbSNP per Sample') + dev.off() +} else { + print('Plotting command-line arguments') + qplot(Sample,PF_Reads_Pct,data=novel_with_highlights,color=selected_samples) + opts(title='% PF Reads Aligned per Sample') +} #qplot(Sample,Library_Size_HS,data=novel_with_highlights,color=selected_samples) + opts(title='Hybrid Sequencing Library Size per Sample') #qplot(Sample,MEDIAN_INSERT_SIZE,data=novel_with_highlights,color=selected_samples) + opts(title='Median Insert Size per Sample')