gatk-3.8/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/AnalyzeDepthOfCoverage.java

370 lines
16 KiB
Java

package org.broadinstitute.sting.oneoffprojects.firehosesummary;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Feb 18, 2010
*/
class AnalyzeDepthCLP extends CommandLineProgram {
@Argument(fullName = "depthOfCoverageFile", shortName = "df", doc = "The Depth of Coverage output file", required = true)
public File docFile = null;
@Argument(fullName = "summaryFile", shortName = "sf", doc = "The summary file to which to output", required = true)
public File summaryFile = null;
@Argument(fullName = "plotBaseName", shortName = "bn", doc = "The base name for the plot files (e.g. 'foo' yields plots 'foo_DoC_by_sample.pdf'). Please ensure this name contains no spaces.", required = false)
public String plotBaseName = "DepthAnalysis";
@Argument(fullName = "pathToRScript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "./";
private boolean containsByLocus = false;
private boolean containsByTarget = false;
///////////////////////////////////////////////////////////////////////////////////
// CONSTANT VALUES: SUMMARY STRING FOR NO INFORMATION, R-SCRIPT ARGUMENTS, ETC
///////////////////////////////////////////////////////////////////////////////////
private final String DEFAULT_SUMMARY_STRING = "No Summary Information";
private final String PER_LOCUS_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_bases_above\\;Per_Sample_Depth_of_Coverage\\;"+plotBaseName+"_per_locus";
private final String PER_TARGET_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_targets_with_mean_coverage_above\\;Per_Sample_Average_DoC_Over_Targets\\;"+plotBaseName+"_per_target";
///////////////////////////////////////////////////////////////////////////////////
// ANALYSIS START: CALCULATE STATISTICS, WRITE IN R-READABLE FORMAT, MAKE PLOTS
///////////////////////////////////////////////////////////////////////////////////
protected int execute() {
List<DepthStatisticsCalculator> depthStats = calculateDepthStatistics(docFile);
String perLocusSummary = DEFAULT_SUMMARY_STRING;
String perTargetSummary = DEFAULT_SUMMARY_STRING;
if ( containsByLocus ) {
File baseSummaryTable = writeBaseSummaryFile(depthStats);
perLocusSummary = generatePerLocusSummary(baseSummaryTable,depthStats);
}
if ( containsByTarget ) {
File targetSummaryTable = writeTargetSumamryFile(depthStats);
perTargetSummary = generatePerTargetSummary(targetSummaryTable, depthStats);
}
writeSummaryInfoFile(summaryFile,perLocusSummary,perTargetSummary);
return 1;
}
///////////////////////////////////////////////////////////////////////////////////
// OPEN AND WRITE FINAL SUMMARY DOC FILE
///////////////////////////////////////////////////////////////////////////////////
private void writeSummaryInfoFile(File sFile, String locusSummary, String targetSummary) {
PrintWriter writer;
try {
writer = new PrintWriter(sFile);
writer.printf("%s%n","##Depth of coverage summary file");
writer.printf("%s%n","##Well_Covered_Samples_By_Base - % of samples with >80% bases covered to 10x");
writer.printf("%s%n","##Well_Covered_Samples_By_Mean - % of samples with mean coverage > 10x");
writer.printf("%s%n%n","##Well_Covered_Samples_By_Target - % of samples with >80% targets covered to 10x");
if ( containsByLocus )
writer.printf("%s%n",locusSummary);
if ( containsByTarget )
writer.printf("%s",targetSummary);
writer.close();
} catch (IOException e) {
throw new StingException("Error writing final depth of coverage summary file",e);
}
}
///////////////////////////////////////////////////////////////////////////////////
// CALL R-SCRIPTS AND GENERATE OVERALL SUMMARY FILES
///////////////////////////////////////////////////////////////////////////////////
private String generatePerLocusSummary(File rReadablePlotFile, List<DepthStatisticsCalculator> calcs) {
String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_LOCUS_R_ARGUMENTS;
try {
Process p = Runtime.getRuntime().exec(rCommand);
} catch ( IOException e ) {
throw new StingException("Error executing r command for per locus plot generation",e);
}
StringBuilder summary = new StringBuilder();
summary.append(String.format("%s%n","PER_LOCUS_SUMMARY"));
int numSamples = calcs.size()-2;
int numGoodSamples = 0;
int numGoodSamplesByMeanCvg = 0;
double totalAvgCoverage = -1;
double totalStdevCoverage = -1;
for ( DepthStatisticsCalculator calc : calcs ) {
if ( calc.getName().equalsIgnoreCase("total_coverage")) {
totalAvgCoverage = calc.getMean();
totalStdevCoverage = Math.sqrt(calc.getVar());
} else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
if ( calc.getPercentWellCoveredLoci() > 0.8 ) {
numGoodSamples++;
}
if ( calc.getMean() > 10 ) {
numGoodSamplesByMeanCvg++;
}
}
}
summary.append(String.format("%s\t%f%n","Average_Coverage:",totalAvgCoverage));
summary.append(String.format("%s\t%f%n","Stdev_Coverage:",totalStdevCoverage));
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Base", ( (double) numGoodSamples*100 )/( (double) numSamples)));
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Mean", ( (double) numGoodSamplesByMeanCvg*100) / ( (double) numSamples )));
return summary.toString();
}
private String generatePerTargetSummary(File rReadablePlotFile, List<DepthStatisticsCalculator> calcs) {
String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_TARGET_R_ARGUMENTS;
try {
Process p = Runtime.getRuntime().exec(rCommand);
} catch ( IOException e ) {
throw new StingException("Error executing r command for per locus plot generation",e);
}
StringBuilder summary = new StringBuilder();
summary.append(String.format("%s%n","PER_TARGET_SUMMARY"));
int numSamples = calcs.size()-2;
int numGoodSamples = 0;
for ( DepthStatisticsCalculator calc : calcs ) {
if ( calc.getName().equalsIgnoreCase("total_coverage")) {
// do nothing
} else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
if ( calc.getPercentWellCoveredTargets() > 0.8 ) {
numGoodSamples++;
}
}
}
summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Target", ( (double) numGoodSamples*100) / ( (double) numSamples )));
return summary.toString();
}
///////////////////////////////////////////////////////////////////////////////////
// R-READABLE TEMPORARY FILE CREATION
///////////////////////////////////////////////////////////////////////////////////
private File writeBaseSummaryFile(List<DepthStatisticsCalculator> calcs) {
File perLocusSummaryFile;
try {
perLocusSummaryFile = File.createTempFile(plotBaseName+"_per_locus_summary",".txt");
} catch ( IOException e ) {
throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e);
}
PrintWriter locusWriter;
try {
locusWriter = new PrintWriter(perLocusSummaryFile);
} catch ( IOException e ) {
throw new StingException("Locus summary temporary file was created but could not be opened.",e);
}
for ( DepthStatisticsCalculator calc : calcs ) {
if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
locusWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getLocusProportions());
locusWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints());
}
}
locusWriter.close();
return perLocusSummaryFile;
}
private File writeTargetSumamryFile(List<DepthStatisticsCalculator> calcs) {
File perTargetSummaryFile;
try {
perTargetSummaryFile = File.createTempFile(plotBaseName+"_per_target_summary",".txt");
} catch ( IOException e ) {
throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e);
}
PrintWriter targetWriter;
try {
targetWriter = new PrintWriter(perTargetSummaryFile);
} catch ( IOException e ) {
throw new StingException("Target summary temporary file was created but could not be opened.",e);
}
for ( DepthStatisticsCalculator calc : calcs ) {
if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) {
targetWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getTargetProportions());
targetWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints());
}
}
targetWriter.close();
return perTargetSummaryFile;
}
///////////////////////////////////////////////////////////////////////////////////
// READING THE DEPTH OF COVERAGE FILE INTO CALCULATOR OBJECTS
///////////////////////////////////////////////////////////////////////////////////
private List<DepthStatisticsCalculator> calculateDepthStatistics(File docFile) {
BufferedReader docReader;
try {
docReader = new BufferedReader( new FileReader(docFile) );
} catch ( IOException e) {
throw new StingException("The file "+docFile.getAbsolutePath()+" could not be opened...",e);
}
String locusHeader = getDOCSectionHeader(docReader); // this will read to the first section header
List<DepthStatisticsCalculator> docCalculators;
if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_LOCUS_COVERAGE_SECTION")) {
containsByLocus = true;
docCalculators = instantiateDOCCalculators(docReader);
updateLocusInfo(docCalculators,docReader);
String targetHeader = getDOCSectionHeader(docReader);
if ( targetHeader != null && targetHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) {
containsByTarget = true;
updateTargetInfo(docCalculators,docReader);
} else {
containsByTarget = false;
}
} else if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) {
containsByTarget = true;
containsByLocus = false;
docCalculators = instantiateDOCCalculators(docReader);
updateTargetInfo(docCalculators,docReader);
} else {
containsByLocus = false;
containsByTarget = false;
docCalculators = null;
}
return docCalculators;
}
private List<DepthStatisticsCalculator> instantiateDOCCalculators(BufferedReader reader) {
String header;
try {
header = reader.readLine();
} catch (IOException e) {
throw new StingException("Unable to read the section header",e);
}
List<DepthStatisticsCalculator> calcs = new ArrayList<DepthStatisticsCalculator>();
int offset = -1;
for ( String entry : header.split("\t") ) {
if ( offset > -1 ) {
calcs.add(new DepthStatisticsCalculator(entry));
}
offset++;
}
return calcs;
}
private void updateLocusInfo(List<DepthStatisticsCalculator> calcs, BufferedReader reader) {
String docLocLine;
try {
docLocLine = reader.readLine();
while ( ! isEndOfSection(docLocLine) ) {
int offset = -1;
for ( String entry : docLocLine.split("\t") ) {
if ( offset > -1 ) {
calcs.get(offset).updateLocus(Integer.parseInt(entry));
}
offset++;
}
}
} catch ( IOException e) {
throw new StingException("Error reading locus depth of coverage information",e);
}
}
private void updateTargetInfo(List<DepthStatisticsCalculator> calcs, BufferedReader reader) {
String docLocLine;
try {
docLocLine = reader.readLine();
while ( ! isEndOfSection(docLocLine) ) {
int offset = -1;
int targetSize = 0;
for ( String entry : docLocLine.split("\t") ) {
if ( offset == -1 ) {
targetSize = parseInterval(entry);
} else {
calcs.get(offset).updateTargets(targetSize,Integer.parseInt(entry));
}
offset++;
}
}
} catch ( IOException e ) {
throw new StingException("Error reading target depth of coverage information",e);
}
}
///////////////////////////////////////////////////////////////////////////////////
// FILE IO METHODS -- DEPEND ON DEPTH OF COVERAGE FILE FORMAT
///////////////////////////////////////////////////////////////////////////////////
private boolean isEndOfSection( String line ) {
// sections delimited by empty line
return line.equalsIgnoreCase("");
}
private String getDOCSectionHeader(BufferedReader reader) {
String header;
try {
do {
header = reader.readLine();
} while ( ! isDOCSectionSeparator(header) && header != null);
} catch (IOException e) {
throw new StingException("Error reading depth of coverage file",e);
}
return header;
}
private boolean isDOCSectionSeparator( String line ) {
return line.contains("_COVERAGE_SECTION");
}
private int parseInterval(String interval) {
String startstop = interval.split(":")[1];
int start = Integer.parseInt(startstop.split("-")[0]);
int stop = Integer.parseInt(startstop.split("-")[1]);
return stop - start;
}
}
///////////////////////////////////////////////////////////////////////////////////
// PROGRAM START -- THE MAIN() METHOD AND WRAPPER CLASS
///////////////////////////////////////////////////////////////////////////////////
public class AnalyzeDepthOfCoverage {
public static void main(String[] args) {
AnalyzeDepthCLP depthAnalysis = new AnalyzeDepthCLP();
CommandLineProgram.start(depthAnalysis,args);
System.exit(0);
}
}