--output_dir has been changed to --output_prefix to give the user more control over the names of the resulting mass of files in AnalyzeAnnotations. The fontsize of the axes is increased. Cumulative filtering plots are removed since the binned filtering plots are much more useful.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2700 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-27 04:50:54 +00:00
parent df112e64b8
commit 4bcdab580c
4 changed files with 21 additions and 111 deletions

View File

@ -4,9 +4,8 @@ args <- commandArgs(TRUE)
verbose = TRUE
input = args[1]
outputDir = args[2]
annotationName = args[3]
minBinCutoff = as.numeric(args[4])
annotationName = args[2]
minBinCutoff = as.numeric(args[3])
c <- read.table(input, header=T)
@ -25,10 +24,10 @@ ymax = max(d$titv)
xmin = min(d$value)
xmax = max(d$value)
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
ma = all[all$value > m,]
@ -43,10 +42,10 @@ points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
dev.off()
outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="")
outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2)
abline(v=m75,lty=2)
@ -57,9 +56,9 @@ legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(2
dev.off()
outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="")
outfile = paste(input, "TiTv_hist.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h",xaxt="n");
plot(all$value,all$numVariants,xlab=annotationName,ylab="num variants in bin",type="h",xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
dev.off()

View File

@ -1,30 +0,0 @@
#!/broad/tools/apps/R-2.6.0/bin/Rscript
args <- commandArgs(TRUE)
verbose = TRUE
input = args[1]
outputDir = args[2]
annotationName = args[3]
c <- read.table(input, header=T)
#
# Plot cumulative Ti/Tv ratio as a function of the annotation
#
gt = c[c$GT==1 & c$numVariants>1000,]
lt = c[c$GT==0 & c$numVariants>1000,]
outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(gt$value,gt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"> x",sep=" "),pch=20);
dev.off()
outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(lt$value,lt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"< x",sep=" "),pch=20);
dev.off()

View File

@ -48,8 +48,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
/////////////////////////////
// Command Line Arguments
/////////////////////////////
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeAnnotations/";
@Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false)
private String OUTPUT_PREFIX = "analyzeAnnotations/";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
@ -73,18 +73,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
//
//---------------------------------------------------------------------------------------------------------------
/**
* Create the output directory and setup the path variables
*/
public void initialize() {
// create the output directory where all the data tables and plots will go
try {
Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
} catch (IOException e) {
throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR);
}
if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
}
@ -128,6 +118,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
public void onTraversalDone( Integer sum ) {
// For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_DIR, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
}
}

View File

@ -138,55 +138,19 @@ public class AnnotationDataManager {
}
}
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR,
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX,
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
System.out.println( "\nExecuting RScript commands:" );
for( String annotationKey: dataFull.keySet() ) {
/*
PrintStream output;
try {
output = new PrintStream(OUTPUT_DIR + annotationKey + ".cumulative.dat");
output = new PrintStream(OUTPUT_PREFIX + annotationKey + ".dat");
} catch (FileNotFoundException e) {
throw new StingException("Can't create intermediate output annotation data file.");
}
// Output a header line
output.println("value\tcumulativeTiTv\tnumVariants\tGT");
// Filter SNPs greater than this annotation value
int numTi = 0;
int numTv = 0;
for( AnnotationDatum datum : data.get( annotationKey ) ) {
numTi += datum.numTransitions;
numTv += datum.numTransversions;
float titv;
if( numTv == 0) { titv = 0.0f; }
else { titv = ((float) numTi) / ((float) numTv); }
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t1");
}
// Filter SNPs less than this annotation value
numTi = 0;
numTv = 0;
Iterator<AnnotationDatum> iter = data.get( annotationKey ).descendingIterator();
while( iter.hasNext() ) {
final AnnotationDatum datum = iter.next();
numTi += datum.numTransitions;
numTv += datum.numTransversions;
float titv;
if( numTv == 0) { titv = 0.0f; }
else { titv = ((float) numTi) / ((float) numTv); }
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t0");
}
*/
PrintStream output;
try {
output = new PrintStream(OUTPUT_DIR + annotationKey + ".binned.dat");
} catch (FileNotFoundException e) {
throw new StingException("Can't create intermediate output annotation data file.");
throw new StingException("Can't create intermediate output annotation data file. Does the output directory exist? " +
OUTPUT_PREFIX + annotationKey + ".dat");
}
// Output a header line
@ -266,30 +230,17 @@ public class AnnotationDataManager {
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t2");
}
/*
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey);
try {
Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
OUTPUT_DIR + annotationKey + ".cumulative.dat"+ " " + OUTPUT_DIR + " " + annotationKey);
} catch (Exception e) {
throw new StingException("Unable to execute RScript command");
}
*/
output.close();
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
" " + MIN_VARIANTS_PER_BIN);
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN);
// Execute the RScript command to plot the table of TiTv values
try {
final Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
" " + MIN_VARIANTS_PER_BIN);
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN);
} catch (Exception e) {
throw new StingException("Unable to execute RScript command");
throw new StingException( "Unable to execute RScript command" );
}
}
}