Added a playground concordance analyzer for summarizing VariantEval across a group.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2867 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a640bd2d79
commit
3738b76320
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
args <- commandArgs(TRUE)
|
||||||
|
|
||||||
|
base_name = args[1]
|
||||||
|
input = args[2]
|
||||||
|
|
||||||
|
d <- read.table(input, header=T)
|
||||||
|
# separate the data into filtered and unfiltered
|
||||||
|
|
||||||
|
d.filtered <- d[d$filter_type=="filtered",]
|
||||||
|
d.unfiltered <- d[d$filter_type=="unfiltered",]
|
||||||
|
|
||||||
|
if (nrow(d.filtered) > 0) {
|
||||||
|
d.display <- d.filtered
|
||||||
|
} else {
|
||||||
|
d.display <- d.unfiltered
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Plot histograms of the known versus novel Ti/Tv
|
||||||
|
#
|
||||||
|
|
||||||
|
outfile = paste(base_name, ".histograms.png", sep="")
|
||||||
|
|
||||||
|
if (nrow(d.filtered) > 0) {
|
||||||
|
nFilterTypes <- 2
|
||||||
|
} else {
|
||||||
|
nFilterTypes <- 1
|
||||||
|
}
|
||||||
|
|
||||||
|
png(outfile, width=600, height=(300 * nFilterTypes))
|
||||||
|
par(cex=1.1, mfrow=c(1 * nFilterTypes,2))
|
||||||
|
nbreaks <- 20
|
||||||
|
color <- "grey"
|
||||||
|
xlim <- c(0,4)
|
||||||
|
|
||||||
|
hist(d.unfiltered$known_titv, nbreaks, col=color, xlim=xlim)
|
||||||
|
hist(d.unfiltered$novel_titv, nbreaks, col=color, xlim=xlim)
|
||||||
|
|
||||||
|
if (nrow(d.filtered) > 0) {
|
||||||
|
hist(d.filtered$known_titv, nbreaks, col=color, xlim=xlim)
|
||||||
|
hist(d.filtered$novel_titv, nbreaks, col=color, xlim=xlim)
|
||||||
|
}
|
||||||
|
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
#
|
||||||
|
# Plot samples in order of novel Ti/Tv versus known Ti/Tv
|
||||||
|
#
|
||||||
|
|
||||||
|
outfile = paste(base_name, ".novel_vs_known_titv.png", sep="")
|
||||||
|
|
||||||
|
png(outfile, width=600, height=600)
|
||||||
|
|
||||||
|
d.display <- d.display[order(d.display$novel_titv),]
|
||||||
|
plot(1:length(d.display$known_titv),d.display$known_titv,type="b",col="blue",ylim=c(0,4), xlab="Sample #", ylab="Ti / Tv")
|
||||||
|
points(1:length(d.display$novel_titv),d.display$novel_titv,type="b",col="red",ylim=c(0,4))
|
||||||
|
legend("bottomright", c("known","novel"), col=c("blue","red"), pch=21)
|
||||||
|
|
||||||
|
dev.off()
|
||||||
|
|
@ -0,0 +1,185 @@
|
||||||
|
package org.broadinstitute.sting.playground.analyzeconcordance;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.xReadLines;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.PathUtils;
|
||||||
|
import org.broadinstitute.sting.playground.utils.ProcessUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares results of VariantEval across a population or a case/control group.
|
||||||
|
*/
|
||||||
|
public class AnalyzeConcordance extends CommandLineProgram {
|
||||||
|
|
||||||
|
@Argument(fullName = "group_name", shortName = "groupName", doc = "The name of the group which will be prefixed output files", required = false)
|
||||||
|
private String baseName = "analyze_concordance";
|
||||||
|
@Argument(fullName = "eval_list", shortName = "evalList", doc = "The input list of unfiltered eval files to analyze", required = true)
|
||||||
|
private String evalListFile = null;
|
||||||
|
@Argument(fullName = "filtered_eval_list", shortName = "filteredEvalList", doc = "The input list of filtered eval files to analyze", required = false)
|
||||||
|
private String filteredEvalListFile = null;
|
||||||
|
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||||
|
private String outputDir = "analyzeConcordance";
|
||||||
|
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript", required = false)
|
||||||
|
private String pathToRscript = "env Rscript";
|
||||||
|
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting analyze concordance R scripts", required = false)
|
||||||
|
private String pathToResources = "R" + File.separator + "analyzeConcordance";
|
||||||
|
|
||||||
|
private enum EvalFilterType {
|
||||||
|
UNFILTERED, FILTERED
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final AnalyzeConcordanceField[] ANALYZE_CONCORDANCE_FIELDS = AnalyzeConcordanceField.values();
|
||||||
|
|
||||||
|
private String evalDataFile;
|
||||||
|
private List<String[]> data = new ArrayList<String[]>();
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(AnalyzeConcordance.class);
|
||||||
|
|
||||||
|
protected int execute() {
|
||||||
|
int result;
|
||||||
|
|
||||||
|
try {
|
||||||
|
createOutputDirectory();
|
||||||
|
|
||||||
|
// initialize all the data from the csv file and allocate the list of covariates
|
||||||
|
logger.info("Reading in input csv file...");
|
||||||
|
initializeData();
|
||||||
|
logger.info("...Done!");
|
||||||
|
|
||||||
|
// output data tables for Rscript to read in
|
||||||
|
logger.info("Writing out intermediate tables for R...");
|
||||||
|
writeDataTables();
|
||||||
|
logger.info("...Done!");
|
||||||
|
|
||||||
|
// perform the analysis using Rscript and output the plots
|
||||||
|
logger.info("Calling analysis R scripts and writing out figures...");
|
||||||
|
result = callRScripts();
|
||||||
|
logger.info("...Done!");
|
||||||
|
|
||||||
|
// perform the analysis using Rscript and output the plots
|
||||||
|
logger.info("Generating html report...");
|
||||||
|
generateHtmlReport();
|
||||||
|
logger.info("...Done!");
|
||||||
|
|
||||||
|
} catch (StingException se) {
|
||||||
|
throw se;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new StingException("Error analyzing concordance", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createOutputDirectory() {
|
||||||
|
// create the output directory where all the data tables and plots will go
|
||||||
|
File outputDir = new File(this.outputDir);
|
||||||
|
if (!outputDir.exists() && !outputDir.mkdirs()) {
|
||||||
|
throw new StingException("Couldn't create directory: " + this.outputDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeData() throws FileNotFoundException {
|
||||||
|
// add the column headers to the data
|
||||||
|
addHeader();
|
||||||
|
|
||||||
|
// read the list of unfiltered eval files
|
||||||
|
addEvalListFile(EvalFilterType.UNFILTERED, new File(evalListFile));
|
||||||
|
|
||||||
|
// if provided, read the list of filtered eval files
|
||||||
|
if (filteredEvalListFile != null) {
|
||||||
|
addEvalListFile(EvalFilterType.FILTERED, new File(filteredEvalListFile));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addHeader() {
|
||||||
|
String[] headers = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
|
||||||
|
int column = 0;
|
||||||
|
headers[column++] = "eval_id";
|
||||||
|
headers[column++] = "filter_type";
|
||||||
|
|
||||||
|
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||||
|
headers[column++] = field.getColumnHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
data.add(headers);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addEvalListFile(EvalFilterType filterType, File evalListFile) throws FileNotFoundException {
|
||||||
|
for (String line : new xReadLines(evalListFile)) {
|
||||||
|
String[] parts = line.split("\t");
|
||||||
|
addEvalFile(parts[0], filterType, new File(parts[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addEvalFile(String evalID, EvalFilterType filterType, File evalFile) throws FileNotFoundException {
|
||||||
|
SortedMap<AnalyzeConcordanceField, String> fieldValues = new TreeMap<AnalyzeConcordanceField, String>();
|
||||||
|
|
||||||
|
for (String line : new xReadLines(evalFile)) {
|
||||||
|
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||||
|
String value = field.parseLine(line);
|
||||||
|
if (value != null) {
|
||||||
|
fieldValues.put(field, value);
|
||||||
|
break; // continue to the next line.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] values = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
|
||||||
|
int column = 0;
|
||||||
|
values[column++] = evalID;
|
||||||
|
values[column++] = filterType.toString().toLowerCase();
|
||||||
|
|
||||||
|
// get all the values, including null if for some reason a value wasn't found
|
||||||
|
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||||
|
values[column++] = fieldValues.get(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
data.add(values);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeDataTables() throws FileNotFoundException {
|
||||||
|
evalDataFile = baseName + ".eval_data.tsv";
|
||||||
|
// Create a PrintStream
|
||||||
|
PrintStream output = new PrintStream(new File(outputDir, evalDataFile));
|
||||||
|
for (String[] line : data) {
|
||||||
|
output.println(Utils.join("\t", line));
|
||||||
|
}
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int callRScripts() {
|
||||||
|
String command = pathToRscript + " "
|
||||||
|
+ new File(pathToResources, "analyzeConcordance.R") + " "
|
||||||
|
+ new File(outputDir, baseName) + " "
|
||||||
|
+ new File(outputDir, evalDataFile);
|
||||||
|
|
||||||
|
return ProcessUtils.runCommandAndWait(command);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void generateHtmlReport() throws FileNotFoundException {
|
||||||
|
// TODO: Enhance the reports
|
||||||
|
PrintStream output = new PrintStream(new File(outputDir, "report.html"));
|
||||||
|
output.println("<html><body>");
|
||||||
|
for (File pngFile : new File(outputDir).listFiles(new PathUtils.ExtensionFilter("png"))) {
|
||||||
|
output.println("<div><img src=\"" + pngFile.getName() + "\"/></div>");
|
||||||
|
}
|
||||||
|
output.println("</body></html>");
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] argv) {
|
||||||
|
try {
|
||||||
|
AnalyzeConcordance instance = new AnalyzeConcordance();
|
||||||
|
start(instance, argv);
|
||||||
|
System.exit(CommandLineProgram.result);
|
||||||
|
} catch (Exception e) {
|
||||||
|
exitSystemWithError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.broadinstitute.sting.playground.analyzeconcordance;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: kshakir
|
||||||
|
* Date: Feb 11, 2010
|
||||||
|
*/
|
||||||
|
public enum AnalyzeConcordanceField {
|
||||||
|
N_BASES_COVERED("all_bases", "all,summary,variant_counts", "n bases covered"),
|
||||||
|
ALL_DBSNP_RATE("all_dbsnp", "all,summary,db_coverage", "dbsnp_rate"),
|
||||||
|
ALL_VARIANT_COUNT("all_variants", "all,summary,variant_counts", "variants"),
|
||||||
|
ALL_TITV_RATIO("all_titv", "all,summary,transitions_transversions", "ratio"),
|
||||||
|
KNOWN_VARIANT_COUNT("known_variants", "known,summary,variant_counts", "variants"),
|
||||||
|
KNOWN_TITV_RATIO("known_titv", "known,summary,transitions_transversions", "ratio"),
|
||||||
|
NOVEL_VARIANT_COUNT("novel_variants", "novel,summary,variant_counts", "variants"),
|
||||||
|
NOVEL_TITV_RATIO("novel_titv", "novel,summary,transitions_transversions", "ratio");
|
||||||
|
|
||||||
|
private String columnHeader;
|
||||||
|
private Pattern pattern;
|
||||||
|
|
||||||
|
private AnalyzeConcordanceField(String columnHeader, String evalHeader, String analysis) {
|
||||||
|
this.columnHeader = columnHeader;
|
||||||
|
|
||||||
|
String lineRegex = evalHeader + " {2,}" + analysis + " {2,}([0-9.]+).*";
|
||||||
|
this.pattern = Pattern.compile(lineRegex);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnHeader() {
|
||||||
|
return this.columnHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String parseLine(String line) {
|
||||||
|
Matcher matcher = this.pattern.matcher(line);
|
||||||
|
if (!matcher.matches())
|
||||||
|
return null;
|
||||||
|
return matcher.group(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.broadinstitute.sting.playground.utils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.xReadLines;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A set of utilities for managing external processes.
|
||||||
|
*/
|
||||||
|
public class ProcessUtils {
|
||||||
|
private static Logger logger = Logger.getLogger(ProcessUtils.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs a command line and returns the result code.
|
||||||
|
* @param command Command line to execute.
|
||||||
|
* @return Result code of the command.
|
||||||
|
*/
|
||||||
|
public static int runCommandAndWait(String command) {
|
||||||
|
try {
|
||||||
|
logger.debug("Running command: " + command);
|
||||||
|
|
||||||
|
Process p = Runtime.getRuntime().exec(command);
|
||||||
|
int result = p.waitFor();
|
||||||
|
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
for (String line : new xReadLines(p.getInputStream())) {
|
||||||
|
logger.debug("command: " + line);
|
||||||
|
}
|
||||||
|
for (String line : new xReadLines(p.getErrorStream())) {
|
||||||
|
logger.error("command: " + line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug("Command exited with result: " + result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new StingException("Error running command:" + command, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Currently built via:
|
||||||
|
ant playground package -Dexecutable=AnalyzeConcordance
|
||||||
|
-->
|
||||||
|
<package name="AnalyzeConcordance">
|
||||||
|
<executable name="AnalyzeConcordance">
|
||||||
|
<main-class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
|
||||||
|
<resource-bundle file="StingText.properties" />
|
||||||
|
<dependencies>
|
||||||
|
<class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
|
||||||
|
</dependencies>
|
||||||
|
</executable>
|
||||||
|
<resources>
|
||||||
|
<!-- Supplemental scripts for graph generation, etc. -->
|
||||||
|
<file name="R/analyzeConcordance/analyzeConcordance.R" />
|
||||||
|
</resources>
|
||||||
|
</package>
|
||||||
Loading…
Reference in New Issue