Adding depth of coverage features to firehose summary tools

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2860 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-02-19 19:47:16 +00:00
parent 71f18e941f
commit 0e05a3acb0
3 changed files with 429 additions and 56 deletions

View File

@ -0,0 +1,369 @@
package org.broadinstitute.sting.oneoffprojects.firehosesummary;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Feb 18, 2010
*/
class AnalyzeDepthCLP extends CommandLineProgram {
@Argument(fullName = "depthOfCoverageFile", shortName = "df", doc = "The Depth of Coverage output file", required = true)
public File docFile = null;
@Argument(fullName = "summaryFile", shortName = "sf", doc = "The summary file to which to output", required = true)
public File summaryFile = null;
@Argument(fullName = "plotBaseName", shortName = "bn", doc = "The base name for the plot files (e.g. 'foo' yields plots 'foo_DoC_by_sample.pdf'). Please ensure this name contains no spaces.", required = false)
public String plotBaseName = "DepthAnalysis";
@Argument(fullName = "pathToRScript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "./";
private boolean containsByLocus = false;
private boolean containsByTarget = false;
///////////////////////////////////////////////////////////////////////////////////
// CONSTANT VALUES: SUMMARY STRING FOR NO INFORMATION, R-SCRIPT ARGUMENTS, ETC
///////////////////////////////////////////////////////////////////////////////////
private final String DEFAULT_SUMMARY_STRING = "No Summary Information";
private final String PER_LOCUS_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_bases_above\\;Per_Sample_Depth_of_Coverage\\;"+plotBaseName+"_per_locus";
private final String PER_TARGET_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_targets_with_mean_coverage_above\\;Per_Sample_Average_DoC_Over_Targets\\;"+plotBaseName+"_per_target";
///////////////////////////////////////////////////////////////////////////////////
// ANALYSIS START: CALCULATE STATISTICS, WRITE IN R-READABLE FORMAT, MAKE PLOTS
///////////////////////////////////////////////////////////////////////////////////
protected int execute() {
List<DepthStatisticsCalculator> depthStats = calculateDepthStatistics(docFile);
String perLocusSummary = DEFAULT_SUMMARY_STRING;
String perTargetSummary = DEFAULT_SUMMARY_STRING;
if ( containsByLocus ) {
File baseSummaryTable = writeBaseSummaryFile(depthStats);
perLocusSummary = generatePerLocusSummary(baseSummaryTable,depthStats);
}
if ( containsByTarget ) {
File targetSummaryTable = writeTargetSumamryFile(depthStats);
perTargetSummary = generatePerTargetSummary(targetSummaryTable, depthStats);
}
writeSummaryInfoFile(summaryFile,perLocusSummary,perTargetSummary);
return 1;
}
///////////////////////////////////////////////////////////////////////////////////
// OPEN AND WRITE FINAL SUMMARY DOC FILE
///////////////////////////////////////////////////////////////////////////////////
private void writeSummaryInfoFile(File sFile, String locusSummary, String targetSummary) {
PrintWriter writer;
try {
writer = new PrintWriter(sFile);
writer.printf("%s%n","##Depth of coverage summary file");
writer.printf("%s%n","##Well_Covered_Samples_By_Base - % of samples with >80% bases covered to 10x");
writer.printf("%s%n","##Well_Covered_Samples_By_Mean - % of samples with mean coverage > 10x");
writer.printf("%s%n%n","##Well_Covered_Samples_By_Target - % of samples with >80% targets covered to 10x");
if ( containsByLocus )
writer.printf("%s%n",locusSummary);
if ( containsByTarget )
writer.printf("%s",targetSummary);
writer.close();
} catch (IOException e) {
throw new StingException("Error writing final depth of coverage summary file",e);
}
}
///////////////////////////////////////////////////////////////////////////////////
// CALL R-SCRIPTS AND GENERATE OVERALL SUMMARY FILES
///////////////////////////////////////////////////////////////////////////////////
private String generatePerLocusSummary(File rReadablePlotFile, List<DepthStatisticsCalculator> calcs) {
String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_LOCUS_R_ARGUMENTS;
try {
Process p = Runtime.getRuntime().exec(rCommand);
} catch ( IOException e ) {
throw new StingException("Error executing r command for per locus plot generation",e);
}
StringBuilder summary = new StringBuilder();
summary.append(String.format("%s%n","PER_LOCUS_SUMMARY"));
int numSamples = calcs.size()-2;
int numGoodSamples = 0;
int numGoodSamplesByMeanCvg = 0;
double totalAvgCoverage = -1;
double totalStdevCoverage = -1;
for ( DepthStatisticsCalculator calc : calcs ) {
if ( calc.getName().equalsIgnoreCase("total_coverage")) {
totalAvgCoverage = calc.getMean();
totalStdevCoverage = Math.sqrt(calc.getVar());
} else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
if ( calc.getPercentWellCoveredLoci() > 0.8 ) {
numGoodSamples++;
}
if ( calc.getMean() > 10 ) {
numGoodSamplesByMeanCvg++;
}
}
}
summary.append(String.format("%s\t%f%n","Average_Coverage:",totalAvgCoverage));
summary.append(String.format("%s\t%f%n","Stdev_Coverage:",totalStdevCoverage));
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Base", ( (double) numGoodSamples*100 )/( (double) numSamples)));
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Mean", ( (double) numGoodSamplesByMeanCvg*100) / ( (double) numSamples )));
return summary.toString();
}
private String generatePerTargetSummary(File rReadablePlotFile, List<DepthStatisticsCalculator> calcs) {
String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_TARGET_R_ARGUMENTS;
try {
Process p = Runtime.getRuntime().exec(rCommand);
} catch ( IOException e ) {
throw new StingException("Error executing r command for per locus plot generation",e);
}
StringBuilder summary = new StringBuilder();
summary.append(String.format("%s%n","PER_TARGET_SUMMARY"));
int numSamples = calcs.size()-2;
int numGoodSamples = 0;
for ( DepthStatisticsCalculator calc : calcs ) {
if ( calc.getName().equalsIgnoreCase("total_coverage")) {
// do nothing
} else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
if ( calc.getPercentWellCoveredTargets() > 0.8 ) {
numGoodSamples++;
}
}
}
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Target", ( (double) numGoodSamples*100) / ( (double) numSamples )));
return summary.toString();
}
///////////////////////////////////////////////////////////////////////////////////
// R-READABLE TEMPORARY FILE CREATION
///////////////////////////////////////////////////////////////////////////////////
private File writeBaseSummaryFile(List<DepthStatisticsCalculator> calcs) {
File perLocusSummaryFile;
try {
perLocusSummaryFile = File.createTempFile(plotBaseName+"_per_locus_summary",".txt");
} catch ( IOException e ) {
throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e);
}
PrintWriter locusWriter;
try {
locusWriter = new PrintWriter(perLocusSummaryFile);
} catch ( IOException e ) {
throw new StingException("Locus summary temporary file was created but could not be opened.",e);
}
for ( DepthStatisticsCalculator calc : calcs ) {
if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
locusWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getLocusProportions());
locusWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints());
}
}
locusWriter.close();
return perLocusSummaryFile;
}
private File writeTargetSumamryFile(List<DepthStatisticsCalculator> calcs) {
File perTargetSummaryFile;
try {
perTargetSummaryFile = File.createTempFile(plotBaseName+"_per_target_summary",".txt");
} catch ( IOException e ) {
throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e);
}
PrintWriter targetWriter;
try {
targetWriter = new PrintWriter(perTargetSummaryFile);
} catch ( IOException e ) {
throw new StingException("Target summary temporary file was created but could not be opened.",e);
}
for ( DepthStatisticsCalculator calc : calcs ) {
if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
targetWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getTargetProportions());
targetWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints());
}
}
targetWriter.close();
return perTargetSummaryFile;
}
///////////////////////////////////////////////////////////////////////////////////
// READING THE DEPTH OF COVERAGE FILE INTO CALCULATOR OBJECTS
///////////////////////////////////////////////////////////////////////////////////
private List<DepthStatisticsCalculator> calculateDepthStatistics(File docFile) {
BufferedReader docReader;
try {
docReader = new BufferedReader( new FileReader(docFile) );
} catch ( IOException e) {
throw new StingException("The file "+docFile.getAbsolutePath()+" could not be opened...",e);
}
String locusHeader = getDOCSectionHeader(docReader); // this will read to the first section header
List<DepthStatisticsCalculator> docCalculators;
if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_LOCUS_COVERAGE_SECTION")) {
containsByLocus = true;
docCalculators = instantiateDOCCalculators(docReader);
updateLocusInfo(docCalculators,docReader);
String targetHeader = getDOCSectionHeader(docReader);
if ( targetHeader != null && targetHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) {
containsByTarget = true;
updateTargetInfo(docCalculators,docReader);
} else {
containsByTarget = false;
}
} else if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) {
containsByTarget = true;
containsByLocus = false;
docCalculators = instantiateDOCCalculators(docReader);
updateTargetInfo(docCalculators,docReader);
} else {
containsByLocus = false;
containsByTarget = false;
docCalculators = null;
}
return docCalculators;
}
private List<DepthStatisticsCalculator> instantiateDOCCalculators(BufferedReader reader) {
String header;
try {
header = reader.readLine();
} catch (IOException e) {
throw new StingException("Unable to read the section header",e);
}
List<DepthStatisticsCalculator> calcs = new ArrayList<DepthStatisticsCalculator>();
int offset = -1;
for ( String entry : header.split("\t") ) {
if ( offset > -1 ) {
calcs.add(new DepthStatisticsCalculator(entry));
}
offset++;
}
return calcs;
}
private void updateLocusInfo(List<DepthStatisticsCalculator> calcs, BufferedReader reader) {
String docLocLine;
try {
docLocLine = reader.readLine();
while ( ! isEndOfSection(docLocLine) ) {
int offset = -1;
for ( String entry : docLocLine.split("\t") ) {
if ( offset > -1 ) {
calcs.get(offset).updateLocus(Integer.parseInt(entry));
}
offset++;
}
}
} catch ( IOException e) {
throw new StingException("Error reading locus depth of coverage information",e);
}
}
private void updateTargetInfo(List<DepthStatisticsCalculator> calcs, BufferedReader reader) {
String docLocLine;
try {
docLocLine = reader.readLine();
while ( ! isEndOfSection(docLocLine) ) {
int offset = -1;
int targetSize = 0;
for ( String entry : docLocLine.split("\t") ) {
if ( offset == -1 ) {
targetSize = parseInterval(entry);
} else {
calcs.get(offset).updateTargets(targetSize,Integer.parseInt(entry));
}
offset++;
}
}
} catch ( IOException e ) {
throw new StingException("Error reading target depth of coverage information",e);
}
}
///////////////////////////////////////////////////////////////////////////////////
// FILE IO METHODS -- DEPEND ON DEPTH OF COVERAGE FILE FORMAT
///////////////////////////////////////////////////////////////////////////////////
private boolean isEndOfSection( String line ) {
// sections delimited by empty line
return line.equalsIgnoreCase("");
}
private String getDOCSectionHeader(BufferedReader reader) {
String header;
try {
do {
header = reader.readLine();
} while ( ! isDOCSectionSeparator(header) && header != null);
} catch (IOException e) {
throw new StingException("Error reading depth of coverage file",e);
}
return header;
}
private boolean isDOCSectionSeparator( String line ) {
return line.contains("_COVERAGE_SECTION");
}
private int parseInterval(String interval) {
String startstop = interval.split(":")[1];
int start = Integer.parseInt(startstop.split("-")[0]);
int stop = Integer.parseInt(startstop.split("-")[1]);
return stop - start;
}
}
///////////////////////////////////////////////////////////////////////////////////
// PROGRAM START -- THE MAIN() METHOD AND WRAPPER CLASS
///////////////////////////////////////////////////////////////////////////////////
public class AnalyzeDepthOfCoverage {
public static void main(String[] args) {
AnalyzeDepthCLP depthAnalysis = new AnalyzeDepthCLP();
CommandLineProgram.start(depthAnalysis,args);
System.exit(0);
}
}

View File

@ -25,7 +25,7 @@ public class DepthStatisticsCalculator extends SummaryStatisticsCalculator {
private int targetsAbove99x;
private int numTargets;
public static double[] DEPTH_CUTOFFS = {1,4,10,25,50,100};
public static int[] DEPTH_CUTOFFS = {1,4,10,25,50,100};
public DepthStatisticsCalculator(String name) {
super(name);
@ -125,4 +125,8 @@ public class DepthStatisticsCalculator extends SummaryStatisticsCalculator {
public double getPercentWellCoveredTargets() {
return 10*( (double) targetsAbove9x )/( (double) numTargets );
}
public int[] getEvalPoints() {
return DepthStatisticsCalculator.DEPTH_CUTOFFS;
}
}

View File

@ -17,18 +17,18 @@ import java.util.*;
class FirehoseSummaryCLP extends CommandLineProgram {
@Argument(fullName = "depthOfCoverageFile", shortName = "doc", doc="Path to the depth of coverage file", required=true)
private File depthOfCoverage = null;
@Argument(fullName = "contaminationFile", shortName = "con", doc="Path to the contamination file", required=true)
private File contamination = null;
@Argument(fullName = "errorRateFile", shortName = "err", doc="Path to the error rate file", required=true)
private File errorRate = null;
@Argument(fullName = "zipFiles", shortName = "zip", doc="List of paths to zip files which contain summary metrics files", required=false)
private String zipFiles = null;
// @Argument(fullName = "contaminationFile", shortName = "con", doc="Path to the contamination file", required=true)
// private File contamination = null;
// @Argument(fullName = "errorRateFile", shortName = "err", doc="Path to the error rate file", required=true)
// private File errorRate = null;
// @Argument(fullName = "zipFiles", shortName = "zip", doc="List of paths to zip files which contain summary metrics files", required=false)
// private String zipFiles = null;
private static String R_SCRIPT = "plotFirehoseDataQCMetrics.R";
private static String SCRIPT_DOC_FLAG = "DOC";
protected int execute() {
SummaryFileCollection metricsFiles = getFileHandles();
// SummaryFileCollection metricsFiles = getFileHandles();
List<DepthStatisticsCalculator> depthStats = calculateDepthStatistics(depthOfCoverage);
String docSummary = makeDOCPlots(depthStats);
return 1;
@ -85,18 +85,18 @@ class FirehoseSummaryCLP extends CommandLineProgram {
return "temporary";
}
private SummaryFileCollection getFileHandles() {
if ( zipFiles == null ) {
return null;
}
SummaryFileCollection summaryFiles = new SummaryFileCollection();
for ( String zipFile : zipFiles.split(",") ) {
summaryFiles.process(zipFile);
}
return summaryFiles;
}
// private SummaryFileCollection getFileHandles() {
// if ( zipFiles == null ) {
// return null;
// }
//
// SummaryFileCollection summaryFiles = new SummaryFileCollection();
// for ( String zipFile : zipFiles.split(",") ) {
// summaryFiles.process(zipFile);
// }
//
// return summaryFiles;
// }
private List<DepthStatisticsCalculator> calculateDepthStatistics(File docFile) {
BufferedReader docReader;
@ -219,39 +219,39 @@ public class GenerateFirehoseSummary {
}
}
class SummaryFileCollection {
// container class for files we'll be summarizing
public Map<String,File> fingerprintSummaryFiles;
public Map<String,File> hybridSelectionMetricsFiles;
public Map<String,File> insertSizeDistributionFiles;
public Map<String,File> alignmentMetricsFiles;
public SummaryFileCollection() {
fingerprintSummaryFiles = new HashMap<String,File>();
hybridSelectionMetricsFiles = new HashMap<String, File>();
insertSizeDistributionFiles = new HashMap<String,File>();
alignmentMetricsFiles = new HashMap<String,File>();
}
public void process(String zipFilePath) {
String sampleName = zipFilePath.split("_sequencing_metrics.zip")[0].split("_")[1];
File fingerprintSummaryFile = new File(sampleName+".summary_fingerprint_metrics");
File hybridSelectionFile = new File(sampleName+".hybrid_selection_metrics");
File insertSizeFile = new File(sampleName+".insert_size_metrics");
File alignmentFile = new File(sampleName+".alignment_metrics");
String command = "unzip "+zipFilePath;
try {
Process p = Runtime.getRuntime().exec(command);
} catch (IOException e) {
throw new RuntimeException("Could not unzip the file "+zipFilePath);
}
fingerprintSummaryFiles.put(sampleName,fingerprintSummaryFile);
hybridSelectionMetricsFiles.put(sampleName,hybridSelectionFile);
insertSizeDistributionFiles.put(sampleName,insertSizeFile);
alignmentMetricsFiles.put(sampleName,alignmentFile);
}
}
//class SummaryFileCollection {
//
// // container class for files we'll be summarizing
//
// public Map<String,File> fingerprintSummaryFiles;
// public Map<String,File> hybridSelectionMetricsFiles;
// public Map<String,File> insertSizeDistributionFiles;
// public Map<String,File> alignmentMetricsFiles;
//
// public SummaryFileCollection() {
// fingerprintSummaryFiles = new HashMap<String,File>();
// hybridSelectionMetricsFiles = new HashMap<String, File>();
// insertSizeDistributionFiles = new HashMap<String,File>();
// alignmentMetricsFiles = new HashMap<String,File>();
// }
//
// public void process(String zipFilePath) {
// String sampleName = zipFilePath.split("_sequencing_metrics.zip")[0].split("_")[1];
// File fingerprintSummaryFile = new File(sampleName+".summary_fingerprint_metrics");
// File hybridSelectionFile = new File(sampleName+".hybrid_selection_metrics");
// File insertSizeFile = new File(sampleName+".insert_size_metrics");
// File alignmentFile = new File(sampleName+".alignment_metrics");
//
// String command = "unzip "+zipFilePath;
// try {
// Process p = Runtime.getRuntime().exec(command);
// } catch (IOException e) {
// throw new RuntimeException("Could not unzip the file "+zipFilePath);
// }
//
// fingerprintSummaryFiles.put(sampleName,fingerprintSummaryFile);
// hybridSelectionMetricsFiles.put(sampleName,hybridSelectionFile);
// insertSizeDistributionFiles.put(sampleName,insertSizeFile);
// alignmentMetricsFiles.put(sampleName,alignmentFile);
// }
//}