A few comments / todos.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6001 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-15 13:09:09 +00:00
parent d7430c23f8
commit ce4e8d2093
1 changed files with 8 additions and 58 deletions

View File

@ -2,10 +2,16 @@ library("gsalib", lib.loc="/Users/depristo/Desktop/broadLocal/GATK/trunk/R/")
require("ggplot2")
require("gplots")
# TODOs:
# Assumes you have indels in your call set. If not you will get errors
# Create pre/post calling sections
# Allow conditional use of the preQCFile (where it's not available)
args = commandArgs(TRUE)
onCMDLine = ! is.na(args[1])
LOAD_DATA = F
LOAD_DATA = T
# creates an array of c(sampleName1, ..., sampleNameN)
parseHighlightSamples <- function(s) {
return(unlist(strsplit(s, ",", fixed=T)))
}
@ -142,6 +148,7 @@ createMetricsBySamples <- function(VariantEvalRoot) {
return(subset(r, Sample != "all"))
}
# actually load the data.
if ( onCMDLine || LOAD_DATA ) {
metricsBySites <- createMetricsBySites(VariantEvalRoot)
metricsBySamples <- createMetricsBySamples(VariantEvalRoot)
@ -212,60 +219,3 @@ if ( ! is.na(outputPDF) ) {
}
# countVariants = subset(as.data.frame(d$CountVariants), Filter=="called" & Novelty != "all" & Sample != "all")
# countVariantsNoNovelty = subset(as.data.frame(d$CountVariants), Filter=="called" & Novelty == "all" & Sample != "all")
# titv = subset(as.data.frame(d$TiTvVariantEvaluator), Filter=="called" & Novelty != "all" & Sample != "all")
# titv$count = titv$nTi + titv$nTv
#
# print(qplot(data=titv, Sample, tiTvRatio, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(count))
# + geom_point()
# + geom_smooth(weight=count))
#
# print(qplot(data=titv, Sample, tiTvRatio, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(count))
# + geom_point()
# + geom_smooth(weight=count, level=0.99, method="lm"))
#
# print(qplot(data=countVariants, Sample, nInsertions, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(nVariantLoci))
# + geom_point()
# + geom_smooth(weight=count))
#
# # moltenCountVariants = melt(subset(countVariants, Filter == "called" & FunctionalClass == "all"), id.vars=c("Sample", "Novelty"),
# # measure.vars=c("hetHomRatio", "deletionInsertionRatio"))
# #
# # qplot(data=subset(moltenCountVariants), Sample, value, facets = Novelty ~ variable, log="", color=Novelty)
#
# # TODO -- really could use nIndels, not just nInsertions and nDeletions, as CountVariants output
#
# plotByNoveltyAndSample <- function(data, measures) {
# sub = subset(data, Filter == "called" & FunctionalClass == "all")
# molten = melt(sub, id.vars=c("Sample", "Novelty"), measure.vars=measures)
# print(qplot(data=molten, Sample, value, color=Novelty) + facet_grid( variable ~ ., scales="free"))
# }
#
# plotBySample <- function(data, measures) {
# sub = subset(data, Filter == "called" & FunctionalClass == "all" & Novelty == "all")
# molten = melt(sub, id.vars=c("Sample"), measure.vars=measures)
# print(summary(molten))
# print(head(molten))
# print(qplot(data=molten, Sample, value) + facet_grid( variable ~ ., scales="free"))
# molten
# }
#
# scatterCounts <- function(data, measures) {
# sub = subset(data, Filter == "called" & FunctionalClass == "all")
# for ( measure in measures ) {
# print(measure)
# raw = melt(sub, id.vars=c("Sample", "Novelty"), measure.vars=c(measure))
# xy = cast(raw, Sample ~ Novelty)
# print(qplot(data=xy, known, novel, geom="blank", main=measure) + geom_text(aes(label=Sample)))
# }
# }
#
# variantMeasures = c("nSNPs", "nInsertions", "nDeletions", "nSingletons")
# genotypingMeasures = c("nHomVar", "nHets", "nNoCalls")
#
# plotByNoveltyAndSample(countVariants, variantMeasures)
# scatterCounts(countVariants, variantMeasures)
#
# plotBySample(countVariantsNoNovelty, genotypingMeasures)