--output_dir has been changed to --output_prefix to give the user more control over the names of the resulting mass of files in AnalyzeAnnotations. The fontsize of the axes is increased. Cumulative filtering plots are removed since the binned filtering plots are much more useful.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2700 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
df112e64b8
commit
4bcdab580c
|
|
@ -4,9 +4,8 @@ args <- commandArgs(TRUE)
|
|||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
outputDir = args[2]
|
||||
annotationName = args[3]
|
||||
minBinCutoff = as.numeric(args[4])
|
||||
annotationName = args[2]
|
||||
minBinCutoff = as.numeric(args[3])
|
||||
|
||||
|
||||
c <- read.table(input, header=T)
|
||||
|
|
@ -25,10 +24,10 @@ ymax = max(d$titv)
|
|||
xmin = min(d$value)
|
||||
xmax = max(d$value)
|
||||
|
||||
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
|
||||
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
|
||||
ma = all[all$value > m,]
|
||||
|
|
@ -43,10 +42,10 @@ points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
|||
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||
dev.off()
|
||||
|
||||
outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
|
||||
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2)
|
||||
abline(v=m75,lty=2)
|
||||
|
|
@ -57,9 +56,9 @@ legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(2
|
|||
dev.off()
|
||||
|
||||
|
||||
outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, "TiTv_hist.", annotationName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h",xaxt="n");
|
||||
plot(all$value,all$numVariants,xlab=annotationName,ylab="num variants in bin",type="h",xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
dev.off()
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
#!/broad/tools/apps/R-2.6.0/bin/Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
outputDir = args[2]
|
||||
annotationName = args[3]
|
||||
|
||||
|
||||
c <- read.table(input, header=T)
|
||||
|
||||
#
|
||||
# Plot cumulative Ti/Tv ratio as a function of the annotation
|
||||
#
|
||||
|
||||
gt = c[c$GT==1 & c$numVariants>1000,]
|
||||
lt = c[c$GT==0 & c$numVariants>1000,]
|
||||
|
||||
outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(gt$value,gt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"> x",sep=" "),pch=20);
|
||||
dev.off()
|
||||
|
||||
outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(lt$value,lt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"< x",sep=" "),pch=20);
|
||||
dev.off()
|
||||
|
|
@ -48,8 +48,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String OUTPUT_DIR = "analyzeAnnotations/";
|
||||
@Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false)
|
||||
private String OUTPUT_PREFIX = "analyzeAnnotations/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
|
|
@ -73,18 +73,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create the output directory and setup the path variables
|
||||
*/
|
||||
public void initialize() {
|
||||
|
||||
// create the output directory where all the data tables and plots will go
|
||||
try {
|
||||
Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR);
|
||||
}
|
||||
if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
}
|
||||
|
||||
|
|
@ -128,6 +118,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
public void onTraversalDone( Integer sum ) {
|
||||
|
||||
// For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables
|
||||
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_DIR, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
|
||||
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
|
||||
}
|
||||
}
|
||||
|
|
@ -138,55 +138,19 @@ public class AnnotationDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR,
|
||||
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX,
|
||||
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
|
||||
|
||||
System.out.println( "\nExecuting RScript commands:" );
|
||||
|
||||
for( String annotationKey: dataFull.keySet() ) {
|
||||
|
||||
/*
|
||||
PrintStream output;
|
||||
try {
|
||||
output = new PrintStream(OUTPUT_DIR + annotationKey + ".cumulative.dat");
|
||||
output = new PrintStream(OUTPUT_PREFIX + annotationKey + ".dat");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Can't create intermediate output annotation data file.");
|
||||
}
|
||||
// Output a header line
|
||||
output.println("value\tcumulativeTiTv\tnumVariants\tGT");
|
||||
|
||||
// Filter SNPs greater than this annotation value
|
||||
int numTi = 0;
|
||||
int numTv = 0;
|
||||
for( AnnotationDatum datum : data.get( annotationKey ) ) {
|
||||
numTi += datum.numTransitions;
|
||||
numTv += datum.numTransversions;
|
||||
float titv;
|
||||
if( numTv == 0) { titv = 0.0f; }
|
||||
else { titv = ((float) numTi) / ((float) numTv); }
|
||||
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t1");
|
||||
|
||||
}
|
||||
|
||||
// Filter SNPs less than this annotation value
|
||||
numTi = 0;
|
||||
numTv = 0;
|
||||
Iterator<AnnotationDatum> iter = data.get( annotationKey ).descendingIterator();
|
||||
while( iter.hasNext() ) {
|
||||
final AnnotationDatum datum = iter.next();
|
||||
numTi += datum.numTransitions;
|
||||
numTv += datum.numTransversions;
|
||||
float titv;
|
||||
if( numTv == 0) { titv = 0.0f; }
|
||||
else { titv = ((float) numTi) / ((float) numTv); }
|
||||
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t0");
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
PrintStream output;
|
||||
try {
|
||||
output = new PrintStream(OUTPUT_DIR + annotationKey + ".binned.dat");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Can't create intermediate output annotation data file.");
|
||||
throw new StingException("Can't create intermediate output annotation data file. Does the output directory exist? " +
|
||||
OUTPUT_PREFIX + annotationKey + ".dat");
|
||||
}
|
||||
|
||||
// Output a header line
|
||||
|
|
@ -266,30 +230,17 @@ public class AnnotationDataManager {
|
|||
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t2");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
|
||||
OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey);
|
||||
|
||||
try {
|
||||
Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
|
||||
OUTPUT_DIR + annotationKey + ".cumulative.dat"+ " " + OUTPUT_DIR + " " + annotationKey);
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Unable to execute RScript command");
|
||||
}
|
||||
*/
|
||||
output.close();
|
||||
|
||||
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
|
||||
OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
|
||||
" " + MIN_VARIANTS_PER_BIN);
|
||||
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN);
|
||||
|
||||
// Execute the RScript command to plot the table of TiTv values
|
||||
try {
|
||||
final Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
|
||||
OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
|
||||
" " + MIN_VARIANTS_PER_BIN);
|
||||
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN);
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Unable to execute RScript command");
|
||||
throw new StingException( "Unable to execute RScript command" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue