Added a playground concordance analyzer for summarizing VariantEval across a group.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2867 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-02-22 20:28:52 +00:00
parent a640bd2d79
commit 3738b76320
5 changed files with 346 additions and 0 deletions

View File

@ -0,0 +1,61 @@
#!/usr/bin/env Rscript
args <- commandArgs(TRUE)
base_name = args[1]
input = args[2]
d <- read.table(input, header=T)
# separate the data into filtered and unfiltered
d.filtered <- d[d$filter_type=="filtered",]
d.unfiltered <- d[d$filter_type=="unfiltered",]
if (nrow(d.filtered) > 0) {
d.display <- d.filtered
} else {
d.display <- d.unfiltered
}
#
# Plot histograms of the known versus novel Ti/Tv
#
outfile = paste(base_name, ".histograms.png", sep="")
if (nrow(d.filtered) > 0) {
nFilterTypes <- 2
} else {
nFilterTypes <- 1
}
png(outfile, width=600, height=(300 * nFilterTypes))
par(cex=1.1, mfrow=c(1 * nFilterTypes,2))
nbreaks <- 20
color <- "grey"
xlim <- c(0,4)
hist(d.unfiltered$known_titv, nbreaks, col=color, xlim=xlim)
hist(d.unfiltered$novel_titv, nbreaks, col=color, xlim=xlim)
if (nrow(d.filtered) > 0) {
hist(d.filtered$known_titv, nbreaks, col=color, xlim=xlim)
hist(d.filtered$novel_titv, nbreaks, col=color, xlim=xlim)
}
dev.off()
#
# Plot samples in order of novel Ti/Tv versus known Ti/Tv
#
outfile = paste(base_name, ".novel_vs_known_titv.png", sep="")
png(outfile, width=600, height=600)
d.display <- d.display[order(d.display$novel_titv),]
plot(1:length(d.display$known_titv),d.display$known_titv,type="b",col="blue",ylim=c(0,4), xlab="Sample #", ylab="Ti / Tv")
points(1:length(d.display$novel_titv),d.display$novel_titv,type="b",col="red",ylim=c(0,4))
legend("bottomright", c("known","novel"), col=c("blue","red"), pch=21)
dev.off()

View File

@ -0,0 +1,185 @@
package org.broadinstitute.sting.playground.analyzeconcordance;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.PathUtils;
import org.broadinstitute.sting.playground.utils.ProcessUtils;
import org.apache.log4j.Logger;
import java.io.*;
import java.util.*;
/**
* Compares results of VariantEval across a population or a case/control group.
*/
public class AnalyzeConcordance extends CommandLineProgram {
@Argument(fullName = "group_name", shortName = "groupName", doc = "The name of the group which will be prefixed output files", required = false)
private String baseName = "analyze_concordance";
@Argument(fullName = "eval_list", shortName = "evalList", doc = "The input list of unfiltered eval files to analyze", required = true)
private String evalListFile = null;
@Argument(fullName = "filtered_eval_list", shortName = "filteredEvalList", doc = "The input list of filtered eval files to analyze", required = false)
private String filteredEvalListFile = null;
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String outputDir = "analyzeConcordance";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript", required = false)
private String pathToRscript = "env Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting analyze concordance R scripts", required = false)
private String pathToResources = "R" + File.separator + "analyzeConcordance";
private enum EvalFilterType {
UNFILTERED, FILTERED
}
private static final AnalyzeConcordanceField[] ANALYZE_CONCORDANCE_FIELDS = AnalyzeConcordanceField.values();
private String evalDataFile;
private List<String[]> data = new ArrayList<String[]>();
private static Logger logger = Logger.getLogger(AnalyzeConcordance.class);
protected int execute() {
int result;
try {
createOutputDirectory();
// initialize all the data from the csv file and allocate the list of covariates
logger.info("Reading in input csv file...");
initializeData();
logger.info("...Done!");
// output data tables for Rscript to read in
logger.info("Writing out intermediate tables for R...");
writeDataTables();
logger.info("...Done!");
// perform the analysis using Rscript and output the plots
logger.info("Calling analysis R scripts and writing out figures...");
result = callRScripts();
logger.info("...Done!");
// perform the analysis using Rscript and output the plots
logger.info("Generating html report...");
generateHtmlReport();
logger.info("...Done!");
} catch (StingException se) {
throw se;
} catch (Exception e) {
throw new StingException("Error analyzing concordance", e);
}
return result;
}
private void createOutputDirectory() {
// create the output directory where all the data tables and plots will go
File outputDir = new File(this.outputDir);
if (!outputDir.exists() && !outputDir.mkdirs()) {
throw new StingException("Couldn't create directory: " + this.outputDir);
}
}
private void initializeData() throws FileNotFoundException {
// add the column headers to the data
addHeader();
// read the list of unfiltered eval files
addEvalListFile(EvalFilterType.UNFILTERED, new File(evalListFile));
// if provided, read the list of filtered eval files
if (filteredEvalListFile != null) {
addEvalListFile(EvalFilterType.FILTERED, new File(filteredEvalListFile));
}
}
private void addHeader() {
String[] headers = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
int column = 0;
headers[column++] = "eval_id";
headers[column++] = "filter_type";
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
headers[column++] = field.getColumnHeader();
}
data.add(headers);
}
private void addEvalListFile(EvalFilterType filterType, File evalListFile) throws FileNotFoundException {
for (String line : new xReadLines(evalListFile)) {
String[] parts = line.split("\t");
addEvalFile(parts[0], filterType, new File(parts[1]));
}
}
private void addEvalFile(String evalID, EvalFilterType filterType, File evalFile) throws FileNotFoundException {
SortedMap<AnalyzeConcordanceField, String> fieldValues = new TreeMap<AnalyzeConcordanceField, String>();
for (String line : new xReadLines(evalFile)) {
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
String value = field.parseLine(line);
if (value != null) {
fieldValues.put(field, value);
break; // continue to the next line.
}
}
}
String[] values = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
int column = 0;
values[column++] = evalID;
values[column++] = filterType.toString().toLowerCase();
// get all the values, including null if for some reason a value wasn't found
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
values[column++] = fieldValues.get(field);
}
data.add(values);
}
private void writeDataTables() throws FileNotFoundException {
evalDataFile = baseName + ".eval_data.tsv";
// Create a PrintStream
PrintStream output = new PrintStream(new File(outputDir, evalDataFile));
for (String[] line : data) {
output.println(Utils.join("\t", line));
}
output.close();
}
private int callRScripts() {
String command = pathToRscript + " "
+ new File(pathToResources, "analyzeConcordance.R") + " "
+ new File(outputDir, baseName) + " "
+ new File(outputDir, evalDataFile);
return ProcessUtils.runCommandAndWait(command);
}
private void generateHtmlReport() throws FileNotFoundException {
// TODO: Enhance the reports
PrintStream output = new PrintStream(new File(outputDir, "report.html"));
output.println("<html><body>");
for (File pngFile : new File(outputDir).listFiles(new PathUtils.ExtensionFilter("png"))) {
output.println("<div><img src=\"" + pngFile.getName() + "\"/></div>");
}
output.println("</body></html>");
output.close();
}
public static void main(String[] argv) {
try {
AnalyzeConcordance instance = new AnalyzeConcordance();
start(instance, argv);
System.exit(CommandLineProgram.result);
} catch (Exception e) {
exitSystemWithError(e);
}
}
}

View File

@ -0,0 +1,41 @@
package org.broadinstitute.sting.playground.analyzeconcordance;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* Created by IntelliJ IDEA.
* User: kshakir
* Date: Feb 11, 2010
*/
public enum AnalyzeConcordanceField {
N_BASES_COVERED("all_bases", "all,summary,variant_counts", "n bases covered"),
ALL_DBSNP_RATE("all_dbsnp", "all,summary,db_coverage", "dbsnp_rate"),
ALL_VARIANT_COUNT("all_variants", "all,summary,variant_counts", "variants"),
ALL_TITV_RATIO("all_titv", "all,summary,transitions_transversions", "ratio"),
KNOWN_VARIANT_COUNT("known_variants", "known,summary,variant_counts", "variants"),
KNOWN_TITV_RATIO("known_titv", "known,summary,transitions_transversions", "ratio"),
NOVEL_VARIANT_COUNT("novel_variants", "novel,summary,variant_counts", "variants"),
NOVEL_TITV_RATIO("novel_titv", "novel,summary,transitions_transversions", "ratio");
private String columnHeader;
private Pattern pattern;
private AnalyzeConcordanceField(String columnHeader, String evalHeader, String analysis) {
this.columnHeader = columnHeader;
String lineRegex = evalHeader + " {2,}" + analysis + " {2,}([0-9.]+).*";
this.pattern = Pattern.compile(lineRegex);
}
public String getColumnHeader() {
return this.columnHeader;
}
public String parseLine(String line) {
Matcher matcher = this.pattern.matcher(line);
if (!matcher.matches())
return null;
return matcher.group(1);
}
}

View File

@ -0,0 +1,41 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.StingException;
import org.apache.log4j.Logger;
/**
* A set of utilities for managing external processes.
*/
public class ProcessUtils {
private static Logger logger = Logger.getLogger(ProcessUtils.class);
/**
* Runs a command line and returns the result code.
* @param command Command line to execute.
* @return Result code of the command.
*/
public static int runCommandAndWait(String command) {
try {
logger.debug("Running command: " + command);
Process p = Runtime.getRuntime().exec(command);
int result = p.waitFor();
if (logger.isDebugEnabled()) {
for (String line : new xReadLines(p.getInputStream())) {
logger.debug("command: " + line);
}
for (String line : new xReadLines(p.getErrorStream())) {
logger.error("command: " + line);
}
}
logger.debug("Command exited with result: " + result);
return result;
} catch (Exception e) {
throw new StingException("Error running command:" + command, e);
}
}
}

View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Currently built via:
ant playground package -Dexecutable=AnalyzeConcordance
-->
<package name="AnalyzeConcordance">
<executable name="AnalyzeConcordance">
<main-class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
<resource-bundle file="StingText.properties" />
<dependencies>
<class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
</dependencies>
</executable>
<resources>
<!-- Supplemental scripts for graph generation, etc. -->
<file name="R/analyzeConcordance/analyzeConcordance.R" />
</resources>
</package>