package org.broadinstitute.sting.oneoffprojects.firehosesummary; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import java.io.*; import java.util.ArrayList; import java.util.List; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl * * @Author chartl * @Date Feb 18, 2010 */ class AnalyzeDepthCLP extends CommandLineProgram { @Argument(fullName = "depthOfCoverageFile", shortName = "df", doc = "The Depth of Coverage output file", required = true) public File docFile = null; @Argument(fullName = "summaryFile", shortName = "sf", doc = "The summary file to which to output", required = true) public File summaryFile = null; @Argument(fullName = "plotBaseName", shortName = "bn", doc = "The base name for the plot files (e.g. 'foo' yields plots 'foo_DoC_by_sample.pdf'). Please ensure this name contains no spaces.", required = false) public String plotBaseName = "DepthAnalysis"; @Argument(fullName = "pathToRScript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) private String PATH_TO_RESOURCES = "./"; private boolean containsByLocus = false; private boolean containsByTarget = false; /////////////////////////////////////////////////////////////////////////////////// // CONSTANT VALUES: SUMMARY STRING FOR NO INFORMATION, R-SCRIPT ARGUMENTS, ETC /////////////////////////////////////////////////////////////////////////////////// private final String DEFAULT_SUMMARY_STRING = "No Summary Information"; private final String PER_LOCUS_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_bases_above\\;Per_Sample_Depth_of_Coverage\\;"+plotBaseName+"_per_locus"; private final String PER_TARGET_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_targets_with_mean_coverage_above\\;Per_Sample_Average_DoC_Over_Targets\\;"+plotBaseName+"_per_target"; /////////////////////////////////////////////////////////////////////////////////// // ANALYSIS START: CALCULATE STATISTICS, WRITE IN R-READABLE FORMAT, MAKE PLOTS /////////////////////////////////////////////////////////////////////////////////// protected int execute() { List depthStats = calculateDepthStatistics(docFile); String perLocusSummary = DEFAULT_SUMMARY_STRING; String perTargetSummary = DEFAULT_SUMMARY_STRING; if ( containsByLocus ) { File baseSummaryTable = writeBaseSummaryFile(depthStats); perLocusSummary = generatePerLocusSummary(baseSummaryTable,depthStats); } if ( containsByTarget ) { File targetSummaryTable = writeTargetSumamryFile(depthStats); perTargetSummary = generatePerTargetSummary(targetSummaryTable, depthStats); } writeSummaryInfoFile(summaryFile,perLocusSummary,perTargetSummary); return 1; } /////////////////////////////////////////////////////////////////////////////////// // OPEN AND WRITE FINAL SUMMARY DOC FILE /////////////////////////////////////////////////////////////////////////////////// private void writeSummaryInfoFile(File sFile, String locusSummary, String targetSummary) { PrintWriter writer; try { writer = new PrintWriter(sFile); writer.printf("%s%n","##Depth of coverage summary file"); writer.printf("%s%n","##Well_Covered_Samples_By_Base - % of samples with >80% bases covered to 10x"); writer.printf("%s%n","##Well_Covered_Samples_By_Mean - % of samples with mean coverage > 10x"); writer.printf("%s%n%n","##Well_Covered_Samples_By_Target - % of samples with >80% targets covered to 10x"); if ( containsByLocus ) writer.printf("%s%n",locusSummary); if ( containsByTarget ) writer.printf("%s",targetSummary); writer.close(); } catch (IOException e) { throw new StingException("Error writing final depth of coverage summary file",e); } } /////////////////////////////////////////////////////////////////////////////////// // CALL R-SCRIPTS AND GENERATE OVERALL SUMMARY FILES /////////////////////////////////////////////////////////////////////////////////// private String generatePerLocusSummary(File rReadablePlotFile, List calcs) { String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_LOCUS_R_ARGUMENTS; try { Process p = Runtime.getRuntime().exec(rCommand); } catch ( IOException e ) { throw new StingException("Error executing r command for per locus plot generation",e); } StringBuilder summary = new StringBuilder(); summary.append(String.format("%s%n","PER_LOCUS_SUMMARY")); int numSamples = calcs.size()-2; int numGoodSamples = 0; int numGoodSamplesByMeanCvg = 0; double totalAvgCoverage = -1; double totalStdevCoverage = -1; for ( DepthStatisticsCalculator calc : calcs ) { if ( calc.getName().equalsIgnoreCase("total_coverage")) { totalAvgCoverage = calc.getMean(); totalStdevCoverage = Math.sqrt(calc.getVar()); } else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { if ( calc.getPercentWellCoveredLoci() > 0.8 ) { numGoodSamples++; } if ( calc.getMean() > 10 ) { numGoodSamplesByMeanCvg++; } } } summary.append(String.format("%s\t%f%n","Average_Coverage:",totalAvgCoverage)); summary.append(String.format("%s\t%f%n","Stdev_Coverage:",totalStdevCoverage)); summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Base", ( (double) numGoodSamples*100 )/( (double) numSamples))); summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Mean", ( (double) numGoodSamplesByMeanCvg*100) / ( (double) numSamples ))); return summary.toString(); } private String generatePerTargetSummary(File rReadablePlotFile, List calcs) { String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_TARGET_R_ARGUMENTS; try { Process p = Runtime.getRuntime().exec(rCommand); } catch ( IOException e ) { throw new StingException("Error executing r command for per locus plot generation",e); } StringBuilder summary = new StringBuilder(); summary.append(String.format("%s%n","PER_TARGET_SUMMARY")); int numSamples = calcs.size()-2; int numGoodSamples = 0; for ( DepthStatisticsCalculator calc : calcs ) { if ( calc.getName().equalsIgnoreCase("total_coverage")) { // do nothing } else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { if ( calc.getPercentWellCoveredTargets() > 0.8 ) { numGoodSamples++; } } } summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Target", ( (double) numGoodSamples*100) / ( (double) numSamples ))); return summary.toString(); } /////////////////////////////////////////////////////////////////////////////////// // R-READABLE TEMPORARY FILE CREATION /////////////////////////////////////////////////////////////////////////////////// private File writeBaseSummaryFile(List calcs) { File perLocusSummaryFile; try { perLocusSummaryFile = File.createTempFile(plotBaseName+"_per_locus_summary",".txt"); } catch ( IOException e ) { throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e); } PrintWriter locusWriter; try { locusWriter = new PrintWriter(perLocusSummaryFile); } catch ( IOException e ) { throw new StingException("Locus summary temporary file was created but could not be opened.",e); } for ( DepthStatisticsCalculator calc : calcs ) { if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { locusWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getLocusProportions()); locusWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints()); } } locusWriter.close(); return perLocusSummaryFile; } private File writeTargetSumamryFile(List calcs) { File perTargetSummaryFile; try { perTargetSummaryFile = File.createTempFile(plotBaseName+"_per_target_summary",".txt"); } catch ( IOException e ) { throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e); } PrintWriter targetWriter; try { targetWriter = new PrintWriter(perTargetSummaryFile); } catch ( IOException e ) { throw new StingException("Target summary temporary file was created but could not be opened.",e); } for ( DepthStatisticsCalculator calc : calcs ) { if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { targetWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getTargetProportions()); targetWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints()); } } targetWriter.close(); return perTargetSummaryFile; } /////////////////////////////////////////////////////////////////////////////////// // READING THE DEPTH OF COVERAGE FILE INTO CALCULATOR OBJECTS /////////////////////////////////////////////////////////////////////////////////// private List calculateDepthStatistics(File docFile) { BufferedReader docReader; try { docReader = new BufferedReader( new FileReader(docFile) ); } catch ( IOException e) { throw new StingException("The file "+docFile.getAbsolutePath()+" could not be opened...",e); } String locusHeader = getDOCSectionHeader(docReader); // this will read to the first section header List docCalculators; if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_LOCUS_COVERAGE_SECTION")) { containsByLocus = true; docCalculators = instantiateDOCCalculators(docReader); updateLocusInfo(docCalculators,docReader); String targetHeader = getDOCSectionHeader(docReader); if ( targetHeader != null && targetHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) { containsByTarget = true; updateTargetInfo(docCalculators,docReader); } else { containsByTarget = false; } } else if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) { containsByTarget = true; containsByLocus = false; docCalculators = instantiateDOCCalculators(docReader); updateTargetInfo(docCalculators,docReader); } else { containsByLocus = false; containsByTarget = false; docCalculators = null; } return docCalculators; } private List instantiateDOCCalculators(BufferedReader reader) { String header; try { header = reader.readLine(); } catch (IOException e) { throw new StingException("Unable to read the section header",e); } List calcs = new ArrayList(); int offset = -1; for ( String entry : header.split("\t") ) { if ( offset > -1 ) { calcs.add(new DepthStatisticsCalculator(entry)); } offset++; } return calcs; } private void updateLocusInfo(List calcs, BufferedReader reader) { String docLocLine; try { docLocLine = reader.readLine(); while ( ! isEndOfSection(docLocLine) ) { int offset = -1; for ( String entry : docLocLine.split("\t") ) { if ( offset > -1 ) { calcs.get(offset).updateLocus(Integer.parseInt(entry)); } offset++; } } } catch ( IOException e) { throw new StingException("Error reading locus depth of coverage information",e); } } private void updateTargetInfo(List calcs, BufferedReader reader) { String docLocLine; try { docLocLine = reader.readLine(); while ( ! isEndOfSection(docLocLine) ) { int offset = -1; int targetSize = 0; for ( String entry : docLocLine.split("\t") ) { if ( offset == -1 ) { targetSize = parseInterval(entry); } else { calcs.get(offset).updateTargets(targetSize,Integer.parseInt(entry)); } offset++; } } } catch ( IOException e ) { throw new StingException("Error reading target depth of coverage information",e); } } /////////////////////////////////////////////////////////////////////////////////// // FILE IO METHODS -- DEPEND ON DEPTH OF COVERAGE FILE FORMAT /////////////////////////////////////////////////////////////////////////////////// private boolean isEndOfSection( String line ) { // sections delimited by empty line return line.equalsIgnoreCase(""); } private String getDOCSectionHeader(BufferedReader reader) { String header; try { do { header = reader.readLine(); } while ( ! isDOCSectionSeparator(header) && header != null); } catch (IOException e) { throw new StingException("Error reading depth of coverage file",e); } return header; } private boolean isDOCSectionSeparator( String line ) { return line.contains("_COVERAGE_SECTION"); } private int parseInterval(String interval) { String startstop = interval.split(":")[1]; int start = Integer.parseInt(startstop.split("-")[0]); int stop = Integer.parseInt(startstop.split("-")[1]); return stop - start; } } /////////////////////////////////////////////////////////////////////////////////// // PROGRAM START -- THE MAIN() METHOD AND WRAPPER CLASS /////////////////////////////////////////////////////////////////////////////////// public class AnalyzeDepthOfCoverage { public static void main(String[] args) { AnalyzeDepthCLP depthAnalysis = new AnalyzeDepthCLP(); CommandLineProgram.start(depthAnalysis,args); System.exit(0); } }