Added a playground concordance analyzer for summarizing VariantEval across a group.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2867 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a640bd2d79
commit
3738b76320
|
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
base_name = args[1]
|
||||
input = args[2]
|
||||
|
||||
d <- read.table(input, header=T)
|
||||
# separate the data into filtered and unfiltered
|
||||
|
||||
d.filtered <- d[d$filter_type=="filtered",]
|
||||
d.unfiltered <- d[d$filter_type=="unfiltered",]
|
||||
|
||||
if (nrow(d.filtered) > 0) {
|
||||
d.display <- d.filtered
|
||||
} else {
|
||||
d.display <- d.unfiltered
|
||||
}
|
||||
|
||||
#
|
||||
# Plot histograms of the known versus novel Ti/Tv
|
||||
#
|
||||
|
||||
outfile = paste(base_name, ".histograms.png", sep="")
|
||||
|
||||
if (nrow(d.filtered) > 0) {
|
||||
nFilterTypes <- 2
|
||||
} else {
|
||||
nFilterTypes <- 1
|
||||
}
|
||||
|
||||
png(outfile, width=600, height=(300 * nFilterTypes))
|
||||
par(cex=1.1, mfrow=c(1 * nFilterTypes,2))
|
||||
nbreaks <- 20
|
||||
color <- "grey"
|
||||
xlim <- c(0,4)
|
||||
|
||||
hist(d.unfiltered$known_titv, nbreaks, col=color, xlim=xlim)
|
||||
hist(d.unfiltered$novel_titv, nbreaks, col=color, xlim=xlim)
|
||||
|
||||
if (nrow(d.filtered) > 0) {
|
||||
hist(d.filtered$known_titv, nbreaks, col=color, xlim=xlim)
|
||||
hist(d.filtered$novel_titv, nbreaks, col=color, xlim=xlim)
|
||||
}
|
||||
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot samples in order of novel Ti/Tv versus known Ti/Tv
|
||||
#
|
||||
|
||||
outfile = paste(base_name, ".novel_vs_known_titv.png", sep="")
|
||||
|
||||
png(outfile, width=600, height=600)
|
||||
|
||||
d.display <- d.display[order(d.display$novel_titv),]
|
||||
plot(1:length(d.display$known_titv),d.display$known_titv,type="b",col="blue",ylim=c(0,4), xlab="Sample #", ylab="Ti / Tv")
|
||||
points(1:length(d.display$novel_titv),d.display$novel_titv,type="b",col="red",ylim=c(0,4))
|
||||
legend("bottomright", c("known","novel"), col=c("blue","red"), pch=21)
|
||||
|
||||
dev.off()
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
package org.broadinstitute.sting.playground.analyzeconcordance;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.PathUtils;
|
||||
import org.broadinstitute.sting.playground.utils.ProcessUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Compares results of VariantEval across a population or a case/control group.
|
||||
*/
|
||||
public class AnalyzeConcordance extends CommandLineProgram {
|
||||
|
||||
@Argument(fullName = "group_name", shortName = "groupName", doc = "The name of the group which will be prefixed output files", required = false)
|
||||
private String baseName = "analyze_concordance";
|
||||
@Argument(fullName = "eval_list", shortName = "evalList", doc = "The input list of unfiltered eval files to analyze", required = true)
|
||||
private String evalListFile = null;
|
||||
@Argument(fullName = "filtered_eval_list", shortName = "filteredEvalList", doc = "The input list of filtered eval files to analyze", required = false)
|
||||
private String filteredEvalListFile = null;
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String outputDir = "analyzeConcordance";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript", required = false)
|
||||
private String pathToRscript = "env Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting analyze concordance R scripts", required = false)
|
||||
private String pathToResources = "R" + File.separator + "analyzeConcordance";
|
||||
|
||||
private enum EvalFilterType {
|
||||
UNFILTERED, FILTERED
|
||||
}
|
||||
|
||||
private static final AnalyzeConcordanceField[] ANALYZE_CONCORDANCE_FIELDS = AnalyzeConcordanceField.values();
|
||||
|
||||
private String evalDataFile;
|
||||
private List<String[]> data = new ArrayList<String[]>();
|
||||
|
||||
private static Logger logger = Logger.getLogger(AnalyzeConcordance.class);
|
||||
|
||||
protected int execute() {
|
||||
int result;
|
||||
|
||||
try {
|
||||
createOutputDirectory();
|
||||
|
||||
// initialize all the data from the csv file and allocate the list of covariates
|
||||
logger.info("Reading in input csv file...");
|
||||
initializeData();
|
||||
logger.info("...Done!");
|
||||
|
||||
// output data tables for Rscript to read in
|
||||
logger.info("Writing out intermediate tables for R...");
|
||||
writeDataTables();
|
||||
logger.info("...Done!");
|
||||
|
||||
// perform the analysis using Rscript and output the plots
|
||||
logger.info("Calling analysis R scripts and writing out figures...");
|
||||
result = callRScripts();
|
||||
logger.info("...Done!");
|
||||
|
||||
// perform the analysis using Rscript and output the plots
|
||||
logger.info("Generating html report...");
|
||||
generateHtmlReport();
|
||||
logger.info("...Done!");
|
||||
|
||||
} catch (StingException se) {
|
||||
throw se;
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Error analyzing concordance", e);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void createOutputDirectory() {
|
||||
// create the output directory where all the data tables and plots will go
|
||||
File outputDir = new File(this.outputDir);
|
||||
if (!outputDir.exists() && !outputDir.mkdirs()) {
|
||||
throw new StingException("Couldn't create directory: " + this.outputDir);
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeData() throws FileNotFoundException {
|
||||
// add the column headers to the data
|
||||
addHeader();
|
||||
|
||||
// read the list of unfiltered eval files
|
||||
addEvalListFile(EvalFilterType.UNFILTERED, new File(evalListFile));
|
||||
|
||||
// if provided, read the list of filtered eval files
|
||||
if (filteredEvalListFile != null) {
|
||||
addEvalListFile(EvalFilterType.FILTERED, new File(filteredEvalListFile));
|
||||
}
|
||||
}
|
||||
|
||||
private void addHeader() {
|
||||
String[] headers = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
|
||||
int column = 0;
|
||||
headers[column++] = "eval_id";
|
||||
headers[column++] = "filter_type";
|
||||
|
||||
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||
headers[column++] = field.getColumnHeader();
|
||||
}
|
||||
|
||||
data.add(headers);
|
||||
}
|
||||
|
||||
private void addEvalListFile(EvalFilterType filterType, File evalListFile) throws FileNotFoundException {
|
||||
for (String line : new xReadLines(evalListFile)) {
|
||||
String[] parts = line.split("\t");
|
||||
addEvalFile(parts[0], filterType, new File(parts[1]));
|
||||
}
|
||||
}
|
||||
|
||||
private void addEvalFile(String evalID, EvalFilterType filterType, File evalFile) throws FileNotFoundException {
|
||||
SortedMap<AnalyzeConcordanceField, String> fieldValues = new TreeMap<AnalyzeConcordanceField, String>();
|
||||
|
||||
for (String line : new xReadLines(evalFile)) {
|
||||
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||
String value = field.parseLine(line);
|
||||
if (value != null) {
|
||||
fieldValues.put(field, value);
|
||||
break; // continue to the next line.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] values = new String[ANALYZE_CONCORDANCE_FIELDS.length + 2];
|
||||
int column = 0;
|
||||
values[column++] = evalID;
|
||||
values[column++] = filterType.toString().toLowerCase();
|
||||
|
||||
// get all the values, including null if for some reason a value wasn't found
|
||||
for (AnalyzeConcordanceField field : ANALYZE_CONCORDANCE_FIELDS) {
|
||||
values[column++] = fieldValues.get(field);
|
||||
}
|
||||
|
||||
data.add(values);
|
||||
}
|
||||
|
||||
private void writeDataTables() throws FileNotFoundException {
|
||||
evalDataFile = baseName + ".eval_data.tsv";
|
||||
// Create a PrintStream
|
||||
PrintStream output = new PrintStream(new File(outputDir, evalDataFile));
|
||||
for (String[] line : data) {
|
||||
output.println(Utils.join("\t", line));
|
||||
}
|
||||
output.close();
|
||||
}
|
||||
|
||||
private int callRScripts() {
|
||||
String command = pathToRscript + " "
|
||||
+ new File(pathToResources, "analyzeConcordance.R") + " "
|
||||
+ new File(outputDir, baseName) + " "
|
||||
+ new File(outputDir, evalDataFile);
|
||||
|
||||
return ProcessUtils.runCommandAndWait(command);
|
||||
}
|
||||
|
||||
private void generateHtmlReport() throws FileNotFoundException {
|
||||
// TODO: Enhance the reports
|
||||
PrintStream output = new PrintStream(new File(outputDir, "report.html"));
|
||||
output.println("<html><body>");
|
||||
for (File pngFile : new File(outputDir).listFiles(new PathUtils.ExtensionFilter("png"))) {
|
||||
output.println("<div><img src=\"" + pngFile.getName() + "\"/></div>");
|
||||
}
|
||||
output.println("</body></html>");
|
||||
output.close();
|
||||
}
|
||||
|
||||
public static void main(String[] argv) {
|
||||
try {
|
||||
AnalyzeConcordance instance = new AnalyzeConcordance();
|
||||
start(instance, argv);
|
||||
System.exit(CommandLineProgram.result);
|
||||
} catch (Exception e) {
|
||||
exitSystemWithError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
package org.broadinstitute.sting.playground.analyzeconcordance;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: kshakir
|
||||
* Date: Feb 11, 2010
|
||||
*/
|
||||
public enum AnalyzeConcordanceField {
|
||||
N_BASES_COVERED("all_bases", "all,summary,variant_counts", "n bases covered"),
|
||||
ALL_DBSNP_RATE("all_dbsnp", "all,summary,db_coverage", "dbsnp_rate"),
|
||||
ALL_VARIANT_COUNT("all_variants", "all,summary,variant_counts", "variants"),
|
||||
ALL_TITV_RATIO("all_titv", "all,summary,transitions_transversions", "ratio"),
|
||||
KNOWN_VARIANT_COUNT("known_variants", "known,summary,variant_counts", "variants"),
|
||||
KNOWN_TITV_RATIO("known_titv", "known,summary,transitions_transversions", "ratio"),
|
||||
NOVEL_VARIANT_COUNT("novel_variants", "novel,summary,variant_counts", "variants"),
|
||||
NOVEL_TITV_RATIO("novel_titv", "novel,summary,transitions_transversions", "ratio");
|
||||
|
||||
private String columnHeader;
|
||||
private Pattern pattern;
|
||||
|
||||
private AnalyzeConcordanceField(String columnHeader, String evalHeader, String analysis) {
|
||||
this.columnHeader = columnHeader;
|
||||
|
||||
String lineRegex = evalHeader + " {2,}" + analysis + " {2,}([0-9.]+).*";
|
||||
this.pattern = Pattern.compile(lineRegex);
|
||||
}
|
||||
|
||||
public String getColumnHeader() {
|
||||
return this.columnHeader;
|
||||
}
|
||||
|
||||
public String parseLine(String line) {
|
||||
Matcher matcher = this.pattern.matcher(line);
|
||||
if (!matcher.matches())
|
||||
return null;
|
||||
return matcher.group(1);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A set of utilities for managing external processes.
|
||||
*/
|
||||
public class ProcessUtils {
|
||||
private static Logger logger = Logger.getLogger(ProcessUtils.class);
|
||||
|
||||
/**
|
||||
* Runs a command line and returns the result code.
|
||||
* @param command Command line to execute.
|
||||
* @return Result code of the command.
|
||||
*/
|
||||
public static int runCommandAndWait(String command) {
|
||||
try {
|
||||
logger.debug("Running command: " + command);
|
||||
|
||||
Process p = Runtime.getRuntime().exec(command);
|
||||
int result = p.waitFor();
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
for (String line : new xReadLines(p.getInputStream())) {
|
||||
logger.debug("command: " + line);
|
||||
}
|
||||
for (String line : new xReadLines(p.getErrorStream())) {
|
||||
logger.error("command: " + line);
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("Command exited with result: " + result);
|
||||
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Error running command:" + command, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Currently built via:
|
||||
ant playground package -Dexecutable=AnalyzeConcordance
|
||||
-->
|
||||
<package name="AnalyzeConcordance">
|
||||
<executable name="AnalyzeConcordance">
|
||||
<main-class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
|
||||
<resource-bundle file="StingText.properties" />
|
||||
<dependencies>
|
||||
<class name="org.broadinstitute.sting.playground.analyzeconcordance.AnalyzeConcordance" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
<!-- Supplemental scripts for graph generation, etc. -->
|
||||
<file name="R/analyzeConcordance/analyzeConcordance.R" />
|
||||
</resources>
|
||||
</package>
|
||||
Loading…
Reference in New Issue