diff --git a/R/exomePostQC.R b/R/exomePostQC.R
new file mode 100644
index 000000000..5ee5287ee
--- /dev/null
+++ b/R/exomePostQC.R
@@ -0,0 +1,271 @@
+library("gsalib", lib.loc="/Users/depristo/Desktop/broadLocal/GATK/trunk/R/")
+require("ggplot2")
+
+args = commandArgs(TRUE);
+
+onCMDLine = ! is.na(args[1])
+LOAD_DATA = F
+
+parseHighlightSamples <- function(s) {
+  return(unlist(strsplit(s, ",", fixed=T)))
+}
+
+if ( onCMDLine ) {
+  ProjectName = args[1]
+  VariantEvalRoot = args[2]
+  preQCFile = args[3]
+  outputPDF = args[4]
+  if ( ! is.na(args[5]) ) 
+    highlightSamples = parseHighlightSamples(args[5])
+  else
+    highlightSamples = c()
+} else {
+  ProjectName = "InDevelopmentInR"
+  preQCFile <- "~/Desktop/broadLocal/GATK/trunk/qcTestData/GoT2D_exomes_batch_005.cleaned.snps_and_indels.filtered.annotated.preQC.tsv"
+  VariantEvalRoot <- "~/Desktop/broadLocal/GATK/trunk/qcTestData/GoT2D_exomes_batch_005.cleaned.snps_and_indels.filtered.annotated"
+  outputPDF = "bar.pdf"
+  highlightSamples = c() # parseHighlightSamples("29029,47243")
+}
+
+expandVEReport <- function(d) {
+  d$TiTvVariantEvaluator$tiTvRatio = round(d$TiTvVariantEvaluator$tiTvRatio,2) 
+  d$CountVariants$deletionInsertionRatio = round(d$CountVariants$deletionInsertionRatio,2) 
+  d$CountVariants$nIndels = d$CountVariants$nInsertions + d$CountVariants$nDeletions
+  return(d)
+}
+
+createMetricsBySites <- function(VariantEvalRoot, PreQCMetrics) {
+  # Metrics by sites:
+  #  bySite -> counts of SNPs and Indels by novelty, with expectations
+  #  byAF -> snps and indels (known / novel)
+  r = list( bySite = expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".summary.eval", sep=""))),
+               byAF = gsa.read.gatkreport(paste(VariantEvalRoot, ".byAF.eval", sep="")))
+  r$byAF$CountVariants$nIndels = r$byAF$CountVariants$nInsertions + r$byAF$CountVariants$nDeletions
+
+  nChrom = 1 / min(r$byAF$TiTvVariantEvaluator$AlleleFrequency[r$byAF$TiTvVariantEvaluator$AlleleFrequency > 0])
+  r$byAF$TiTvVariantEvaluator$nSNPs = r$byAF$TiTvVariantEvaluator$nTi + r$byAF$TiTvVariantEvaluator$nTv
+  r$byAF$CountVariants$AC = r$byAF$CountVariants$AlleleFrequency * nChrom
+  r$byAF$TiTvVariantEvaluator$AC = r$byAF$TiTvVariantEvaluator$AlleleFrequency * nChrom
+  return(r)
+}
+
+summaryTable <- function(metricsBySites, metricsBySample) {
+  # SNP summary statistics
+  merged = merge(metricsBySites$bySite$CountVariants, metricsBySites$bySite$TiTvVariantEvaluator)
+  sub <- subset(merged, FunctionalClass=="all")
+  raw = melt(sub, id.vars=c("Novelty"), measure.vars=c("nProcessedLoci", "nSNPs", "tiTvRatio", "nIndels", "deletionInsertionRatio"))
+  table = cast(raw, Novelty ~ ...)
+  # doesn't work with textplot
+  colnames(table) <- c("Novelty", "Target size (bp)", "No. SNPs", "Ti/Tv", "No. Indels", "deletion/insertion ratio")
+  return(table)
+}
+
+summaryPlots <- function(metricsBySites) {
+  name = "SNP and Indel count by novelty and allele frequency" 
+  molten = melt(subset(metricsBySites$byAF$CountVariants, FunctionalClass == "all" & Novelty != "all" & AC > 0), id.vars=c("Novelty", "AC"), measure.vars=c(c("nSNPs", "nIndels")))
+  p <- ggplot(data=molten, aes(x=AC, y=value+1, color=Novelty, fill=Novelty), group=variable)
+  p <- p + opts(title = name)
+  p <- p + scale_y_log10("Number of variants")
+  p <- p + geom_area()
+  p <- p + facet_grid(variable ~ ., scales="free")
+  p <- p + scale_x_continuous("Allele count (AC)")
+  print(p)
+  p <- p + scale_x_log10("Allele count (AC)")
+  print(p)
+
+  # Counts vs. Allele frequency 
+  name = "Variant counts by allele count"
+  for ( measure in c("nSNPs", "nIndels")) {
+    molten = melt(subset(metricsBySites$byAF$CountVariants, FunctionalClass == "all" & AC > 0), id.vars=c("Novelty", "AC"), measure.vars=c(measure))
+    p <- ggplot(data=molten, aes(x=AC, y=value+1, color=Novelty), group=variable)
+    p <- p + opts(title = paste(name, ":", measure))
+    p <- p + scale_y_log10("Number of variants")
+    p <- p + scale_x_log10("Allele count (AC)")
+    p <- p + geom_point(alpha=0.5, size=4)
+    p <- p + geom_smooth(aes(weight=value), size=1, method="lm", formula = y ~ x)
+    p <- p + facet_grid(Novelty ~ ., scales="free")
+    print(p)
+  }
+  
+  name = "Transition / transversion ratio by allele count"
+  byAFNoAll = subset(metricsBySites$byAF$TiTvVariantEvaluator, Novelty != "all"  & FunctionalClass == "all" & AC > 0)
+  p <- ggplot(data=byAFNoAll, aes(x=AC, y=tiTvRatio, color=Novelty))
+  p <- p + scale_y_continuous("Transition / transversion ratio", limits=c(0,4))
+  p <- p + opts(title = name)
+  p <- p + geom_smooth(size=2)
+  p <- p + geom_point(aes(size=log10(nSNPs), weight=nSNPs), alpha=0.5)
+  p <- p + scale_x_continuous("Allele count (AC)")
+  print(p)
+  p <- p + scale_x_log10("Allele count (AC)")
+  print(p)
+  
+  # SNPs to indels ratio by allele frequency
+  name = "SNPs to indels ratio by allele frequency" 
+  metricsBySites$byAF$CountVariants$SNP.Indel.Ratio = metricsBySites$byAF$CountVariants$nSNPs / metricsBySites$byAF$CountVariants$nIndels
+  metricsBySites$byAF$CountVariants$SNP.Indel.Ratio[metricsBySites$byAF$CountVariants$nIndels == 0] = NaN
+  p <- ggplot(data=subset(metricsBySites$byAF$CountVariants, FunctionalClass == "all" & Novelty == "all"), aes(x=AC, y=SNP.Indel.Ratio))
+  p <- p + opts(title = name)
+  p <- p + scale_y_continuous("SNP to indel ratio")
+  #p <- p + scale_y_log10()
+  p <- p + geom_point(alpha=0.5, aes(size=log10(nIndels)))
+  p <- p + geom_smooth(size=2, aes(weight=nIndels))
+  print(p)
+  
+  name = "SNP counts by functional class" 
+  molten = melt(subset(metricsBySites$bySite$CountVariants, Novelty != "all" & FunctionalClass != "all"), id.vars=c("Novelty", "FunctionalClass"), measure.vars=c(c("nSNPs")))
+  p <- ggplot(data=molten, aes(x=FunctionalClass, y=value, fill=Novelty), group=FunctionalClass)
+  p <- p + opts(title = name)
+  p <- p + scale_y_log10("No. of SNPs")
+  p <- p + geom_bar(position="dodge")
+  print(p)
+}
+
+addSection <- function(name) {
+    par("mar", c(5, 4, 4, 2))
+    frame()
+    title(name, cex=2)
+}
+ 
+createMetricsBySamples <- function(VariantEvalRoot) {
+  byAFEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
+  #preQCMetrics <- read.table(preQCFile, header=T) 
+  r = merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants)
+
+  # order the samples by nSNPs -- it's the natural ordering.
+  x = subset(r, Novelty=="all")
+  r$Sample <- factor(x$Sample, levels=x$Sample[order(x$nSNPs)])
+
+  # add highlight info
+  r$highlight = r$Sample %in% highlightSamples
+
+  #r = merge(merge(preQCMetrics, byAFEval$TiTvVariantEvaluator), byAFEval$CountVariants)
+  return(subset(r, Sample != "all"))
+}
+
+if ( onCMDLine || LOAD_DATA ) {
+  metricsBySites <- createMetricsBySites(VariantEvalRoot)
+  metricsBySamples <- createMetricsBySamples(VariantEvalRoot)
+}
+
+# -------------------------------------------------------
+# Per sample plots
+# -------------------------------------------------------
+
+perSamplePlots <- function(metricsBySamples) {
+  metricsBySamples$highlightTextSizes = c(1,2)[metricsBySamples$highlight+1]
+  sampleTextLabel <- geom_text(aes(label=Sample, size=highlightTextSizes)) 
+  sampleTextLabelScale <- scale_size("Highlighted samples", to=c(3,5), breaks=c(1,2), labels=c("regular", "highlighted"))
+  xAxis <- scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
+  myRug <- geom_rug(position=jitter)
+  #myRug <- geom_rug(aes(x = NULL))
+
+  measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
+  # Counts vs. Allele frequency 
+  name = "Variant counts by allele count"
+  for ( measure in measures ) {
+    molten = melt(metricsBySamples, id.vars=c("Novelty", "Sample", "highlightTextSizes"), measure.vars=c(measure))
+    p <- ggplot(data=molten, aes(x=Sample, y=value, group=Novelty, color=Novelty), y=value)
+    p <- p + opts(title = paste(name, ":", measure))
+    p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
+    p <- p + sampleTextLabel + sampleTextLabelScale
+    p <- p + myRug
+    p <- p + facet_grid(Novelty ~ ., scales="free")
+    # how do we remove the labels?
+    p <- p + xAxis
+    print(p)
+  }
+  
+  for ( novelty in c("all", "known", "novel") ) {
+    # TODO -- how can I color it as before?
+    # TODO -- add marginal distributions?
+    molten = melt(subset(metricsBySamples, Novelty==novelty), id.vars=c("Sample", "highlightTextSizes"), measure.vars=measures)
+    p <- ggplot(data=molten, aes(x=Sample, y=value))
+    p <- p + opts(title = paste(name, ":", novelty))
+#    p <- p + scale_y_log10("Number of variants")
+#    p <- p + geom_point(alpha=0.5, size=4)
+    p <- p + sampleTextLabel + sampleTextLabelScale
+    p <- p + myRug
+    p <- p + facet_grid(variable ~ ., scales="free")
+    # how do we remove the labels?
+    p <- p + xAxis
+print(p)
+  }
+}
+
+# -------------------------------------------------------
+# Actually invoke the above plotting functions 
+# -------------------------------------------------------
+
+if ( ! is.na(outputPDF) ) {
+  pdf(outputPDF, height=8.5, width=11)
+} 
+
+# Table of overall counts and quality
+textplot(as.data.frame(summaryTable(metricsBySites)), show.rownames=F)
+title(paste("Overall summary metrics for project", ProjectName), cex=3)
+ 
+summaryPlots(metricsBySites)
+perSamplePlots(metricsBySamples)
+
+if ( ! is.na(outputPDF) ) {
+  dev.off()
+} 
+
+
+# countVariants = subset(as.data.frame(d$CountVariants), Filter=="called" & Novelty != "all" & Sample != "all")
+# countVariantsNoNovelty = subset(as.data.frame(d$CountVariants), Filter=="called" & Novelty == "all" & Sample != "all")
+# titv = subset(as.data.frame(d$TiTvVariantEvaluator), Filter=="called" & Novelty != "all" & Sample != "all")
+# titv$count = titv$nTi + titv$nTv
+# 
+# print(qplot(data=titv, Sample, tiTvRatio, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(count))
+#  + geom_point()
+#  + geom_smooth(weight=count))
+# 
+# print(qplot(data=titv, Sample, tiTvRatio, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(count))
+# + geom_point()
+# + geom_smooth(weight=count, level=0.99, method="lm"))
+# 
+# print(qplot(data=countVariants, Sample, nInsertions, group=Novelty, color=Novelty, facets = FunctionalClass ~ Novelty, size=log10(nVariantLoci))
+# + geom_point()
+# + geom_smooth(weight=count))
+# 
+# # moltenCountVariants = melt(subset(countVariants, Filter == "called" & FunctionalClass == "all"), id.vars=c("Sample", "Novelty"), 
+# #   measure.vars=c("hetHomRatio", "deletionInsertionRatio"))
+# #   
+# # qplot(data=subset(moltenCountVariants), Sample, value, facets = Novelty ~ variable, log="", color=Novelty)
+# 
+# # TODO -- really could use nIndels, not just nInsertions and nDeletions, as CountVariants output
+# 
+# plotByNoveltyAndSample <- function(data, measures) {
+#   sub = subset(data, Filter == "called" & FunctionalClass == "all")
+#   molten = melt(sub, id.vars=c("Sample", "Novelty"), measure.vars=measures)
+#   print(qplot(data=molten, Sample, value, color=Novelty) + facet_grid( variable ~ ., scales="free"))
+# }
+# 
+# plotBySample <- function(data, measures) {
+#   sub = subset(data, Filter == "called" & FunctionalClass == "all" & Novelty == "all")
+#   molten = melt(sub, id.vars=c("Sample"), measure.vars=measures)
+#   print(summary(molten))
+#   print(head(molten))
+#   print(qplot(data=molten, Sample, value) + facet_grid( variable ~ ., scales="free"))
+#   molten
+# }
+# 
+# scatterCounts <- function(data, measures) {
+#   sub = subset(data, Filter == "called" & FunctionalClass == "all")
+#   for ( measure in measures ) {
+#       print(measure)
+#       raw = melt(sub, id.vars=c("Sample", "Novelty"), measure.vars=c(measure))
+#       xy = cast(raw, Sample ~ Novelty)
+#       print(qplot(data=xy, known, novel, geom="blank", main=measure) + geom_text(aes(label=Sample)))
+#   }
+# }
+# 
+# variantMeasures = c("nSNPs", "nInsertions", "nDeletions", "nSingletons")
+# genotypingMeasures = c("nHomVar", "nHets", "nNoCalls")
+# 
+# plotByNoveltyAndSample(countVariants, variantMeasures)
+# scatterCounts(countVariants, variantMeasures)
+# 
+# plotBySample(countVariantsNoNovelty, genotypingMeasures)
+
diff --git a/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala b/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala
new file mode 100755
index 000000000..fa02f82b3
--- /dev/null
+++ b/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala
@@ -0,0 +1,67 @@
+package oneoffs.depristo
+
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
+
+class ExomePostQCEval extends QScript {
+  @Argument(doc="gatkJarFile", required=false)
+  var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar")
+
+  @Argument(shortName = "R", doc="ref", required=false)
+  var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
+
+  @Argument(shortName = "eval", doc="ref", required=true)
+  var evalVCF: File = _
+
+  @Argument(shortName = "intervals", doc="intervals", required=true)
+  val myIntervals: String = null;
+
+  @Argument(shortName = "dbSNP", doc="dbSNP", required=false)
+  val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/bundle/current/b37/dbsnp_132.b37.vcf")
+
+  trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
+    this.logging_level = "INFO";
+    this.jarFile = gatkJarFile;
+    this.reference_sequence = referenceFile;
+    this.memoryLimit = 4
+  }
+
+  // TODO -- should include "standard" eval for plotting expectations
+
+  def script = {
+    // The basic summary eval
+    createEval(".summary",
+               List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
+               List("EvalRod", "CompRod", "Novelty", "FunctionalClass"))
+
+    // The basic summary eval, by AF
+    createEval(".byAF",
+               List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
+               List("EvalRod", "CompRod", "Novelty", "AlleleFrequency", "FunctionalClass"))
+
+    // By sample
+    createEval(".bySample",
+               List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
+               List("EvalRod", "CompRod", "Novelty", "Sample"))
+  }
+
+  def createEval(prefix: String, evalModules: List[String], extraStrats: List[String]) {
+    val eval = new Eval(evalVCF)
+    eval.out = swapExt(evalVCF,".vcf", prefix + ".eval")
+    eval.evalModule = evalModules
+    eval.stratificationModule = List("EvalRod", "CompRod", "Novelty") ::: extraStrats
+    add(eval)
+  }
+
+  class Eval(@Input vcf: File) extends VariantEval with UNIVERSAL_GATK_ARGS {
+    this.rodBind :+= RodBind("eval", "VCF", evalVCF)
+    if ( dbSNP.exists() )
+      this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
+    this.doNotUseAllStandardStratifications = true
+    this.doNotUseAllStandardModules = true
+    this.intervalsString = List(myIntervals);
+  }
+}
+