Beautiful labels, better choice of dimension ranges. Supports fast loading of just first N records for testing.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3964 348d0f76-0448-11de-a6fe-93d51630548a
2010-08-06 23:17:32 +00:00 · 2010-08-06 23:17:32 +00:00 · b63d64bbbc
parent d3bebe0f2c
commit b63d64bbbc
1 changed files with 39 additions and 11 deletions
--- a/R/VariantRecalibratorReport/VariantRecalibratorReport.R
+++ b/R/VariantRecalibratorReport/VariantRecalibratorReport.R
@ -20,6 +20,10 @@ maxVariants = args[5];
 if (is.na(maxVariants)) { maxVariants = -1; }
 maxVariants = as.integer(maxVariants)
 greedy = args[6]
 if (is.na(greedy)) { greedy = -1; }
 greedy = as.integer(greedy)
 getAnnIndex <- function(d, ann) {
    index = -1;
    for (i in c(1:length(names(d)))) {
@ -111,6 +115,32 @@ read.clusters <- function(filename) {
    clusters;
 }
 clusterLimits <- function( vals, defaultMin, defaultMax ) {
    x = c(max(defaultMin, min(vals, -2)), min(defaultMax, max(vals, 2)))
    print(x)
    x
 }
 makeAxis <- function( num, vals, off1, mult1, xmin, xmax ) {
    #labels=as.integer(seq(from=min(vals), to=max(vals), by=(abs(min(vals)) + abs(max(vals)))/5))
    #at=seq(from=min((vals - off1)/mult1), to=max((vals - off1)/mult1), by=(abs(min((vals - off1)/mult1)) + abs(max((vals - off1)/mult1)))/5)
    #from = xmin * mult1 + off1
    #to = xmax * mult1 + off1
    #print(list(off1=off1, mult1=mult1, xmin=xmin, xmax=xmax))
    at = as.integer(seq(from=xmin, to=xmax, by=(abs(xmin) + abs(xmax))/5))
    labels = as.integer(at * mult1 + off1)
    #print(list(from=from, to=to, by=(abs(from) + abs(to))/5))
    #print(list(labels=labels, at=at))
    axis(num, labels=labels, at=at);
 #    axis(num,
 #        labels=as.integer(seq(from=min(vals), to=max(vals), by=(abs(min(vals)) + abs(max(vals)))/5)),
 #        at=seq(from=min((vals - off1)/mult1), to=max((vals - off1)/mult1), by=(abs(min((vals - off1)/mult1)) + abs(max((vals - off1)/mult1)))/5)
 #    );
 }
 plotClusters <- function(d.known, d.novel, d.loci, c, ann1, ann2, filename, maxVariants = -1) {
    index1 = getAnnIndex(d.known, ann1);
    index2 = getAnnIndex(d.known, ann2);
@ -125,8 +155,11 @@ plotClusters <- function(d.known, d.novel, d.loci, c, ann1, ann2, filename, maxV
    off2 = c[[1]]$conversions[[cindex2]]$offset;
    # todo -- should only include min/max actually observed if bounds are bigger than actual distributions
-    xvalsForLims = c(-5, 5)
+    xvalsForLims = clusterLimits(d.known[,index1], -4, 4)
-    yvalsForLims = c(-5, 5)
+    yvalsForLims = clusterLimits(d.known[,index2], -4, 4)
    #d.known = d.known[(d.known[,index1]-off1)/mult1 >= xvalsForLims[1] & (d.known[,index1]-off1)/mult1 <= xvalsForLims[2] & (d.known[,index2]-off2)/mult2 >= yvalsForLims[1] & (d.known[,index2]-off2)/mult2 <= yvalsForLims[2],]
    #xvalsForLims = (d.known[,index1] - off1)/mult1
    #yvalsForLims = (d.known[,index2] - off2)/mult2
    xlims = c(min(xvalsForLims), 1.2*max(xvalsForLims)); 
@ -177,14 +210,8 @@ plotClusters <- function(d.known, d.novel, d.loci, c, ann1, ann2, filename, maxV
        points(ellipse(t(cov), centre=mu), type="l", lwd=lineweight, col=color);
    }
-    axis(1,
+    makeAxis(1, d.novel[,index1], off1, mult1, xvalsForLims[1], xvalsForLims[2]) 
-        labels=as.integer(seq(from=min(d.novel[,index1]), to=max(d.novel[,index1]), by=(abs(min(d.novel[,index1])) + abs(max(d.novel[,index1])))/5)),
+    makeAxis(2, d.novel[,index2], off2, mult2, yvalsForLims[1], yvalsForLims[2]) 
        at=seq(from=min((d.novel[,index1] - off1)/mult1), to=max((d.novel[,index1] - off1)/mult1), by=(abs(min((d.novel[,index1] - off1)/mult1)) + abs(max((d.novel[,index1] - off1)/mult1)))/5)
    );
    axis(2,
        labels=as.integer(seq(from=min(d.novel[,index2]), to=max(d.novel[,index2]), by=(abs(min(d.novel[,index2])) + abs(max(d.novel[,index2])))/5)),
        at=seq(from=min((d.novel[,index2] - off2)/mult2), to=max((d.novel[,index2] - off2)/mult2), by=(abs(min((d.novel[,index2] - off2)/mult2)) + abs(max((d.novel[,index2] - off2)/mult2)))/5)
    );
    if (!is.na(d.loci)) {
        legend("bottomleft", c("Known", "Novel", "Suspicious loci"), col=c("blue", "red", "yellow3"), pch=19);
@ -224,7 +251,8 @@ if (!is.na(lociFile)) {
    l = t$POS;
 }
-d = read.table(vcfTable, header=TRUE);
+print("Greedy reading")
 d = read.table(vcfTable, header=TRUE, nrows = greedy);
 c = read.clusters(clusterFile);
 d.known = d[which(d$DB == 1),];