diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/AnalyzeDepthOfCoverage.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/AnalyzeDepthOfCoverage.java new file mode 100644 index 000000000..aadcad894 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/AnalyzeDepthOfCoverage.java @@ -0,0 +1,369 @@ +package org.broadinstitute.sting.oneoffprojects.firehosesummary; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Feb 18, 2010 + */ + +class AnalyzeDepthCLP extends CommandLineProgram { + @Argument(fullName = "depthOfCoverageFile", shortName = "df", doc = "The Depth of Coverage output file", required = true) + public File docFile = null; + @Argument(fullName = "summaryFile", shortName = "sf", doc = "The summary file to which to output", required = true) + public File summaryFile = null; + @Argument(fullName = "plotBaseName", shortName = "bn", doc = "The base name for the plot files (e.g. 'foo' yields plots 'foo_DoC_by_sample.pdf'). Please ensure this name contains no spaces.", required = false) + public String plotBaseName = "DepthAnalysis"; + @Argument(fullName = "pathToRScript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; + @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + private String PATH_TO_RESOURCES = "./"; + + private boolean containsByLocus = false; + private boolean containsByTarget = false; + + /////////////////////////////////////////////////////////////////////////////////// + // CONSTANT VALUES: SUMMARY STRING FOR NO INFORMATION, R-SCRIPT ARGUMENTS, ETC + /////////////////////////////////////////////////////////////////////////////////// + + private final String DEFAULT_SUMMARY_STRING = "No Summary Information"; + private final String PER_LOCUS_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_bases_above\\;Per_Sample_Depth_of_Coverage\\;"+plotBaseName+"_per_locus"; + private final String PER_TARGET_R_ARGUMENTS = "PlotInterleavedRows depth_of_coverage\\;proportion_of_targets_with_mean_coverage_above\\;Per_Sample_Average_DoC_Over_Targets\\;"+plotBaseName+"_per_target"; + + /////////////////////////////////////////////////////////////////////////////////// + // ANALYSIS START: CALCULATE STATISTICS, WRITE IN R-READABLE FORMAT, MAKE PLOTS + /////////////////////////////////////////////////////////////////////////////////// + + protected int execute() { + List depthStats = calculateDepthStatistics(docFile); + String perLocusSummary = DEFAULT_SUMMARY_STRING; + String perTargetSummary = DEFAULT_SUMMARY_STRING; + + if ( containsByLocus ) { + File baseSummaryTable = writeBaseSummaryFile(depthStats); + perLocusSummary = generatePerLocusSummary(baseSummaryTable,depthStats); + } + + if ( containsByTarget ) { + File targetSummaryTable = writeTargetSumamryFile(depthStats); + perTargetSummary = generatePerTargetSummary(targetSummaryTable, depthStats); + } + + writeSummaryInfoFile(summaryFile,perLocusSummary,perTargetSummary); + + return 1; + } + + /////////////////////////////////////////////////////////////////////////////////// + // OPEN AND WRITE FINAL SUMMARY DOC FILE + /////////////////////////////////////////////////////////////////////////////////// + + private void writeSummaryInfoFile(File sFile, String locusSummary, String targetSummary) { + PrintWriter writer; + try { + writer = new PrintWriter(sFile); + writer.printf("%s%n","##Depth of coverage summary file"); + writer.printf("%s%n","##Well_Covered_Samples_By_Base - % of samples with >80% bases covered to 10x"); + writer.printf("%s%n","##Well_Covered_Samples_By_Mean - % of samples with mean coverage > 10x"); + writer.printf("%s%n%n","##Well_Covered_Samples_By_Target - % of samples with >80% targets covered to 10x"); + if ( containsByLocus ) + writer.printf("%s%n",locusSummary); + if ( containsByTarget ) + writer.printf("%s",targetSummary); + writer.close(); + } catch (IOException e) { + throw new StingException("Error writing final depth of coverage summary file",e); + } + } + + /////////////////////////////////////////////////////////////////////////////////// + // CALL R-SCRIPTS AND GENERATE OVERALL SUMMARY FILES + /////////////////////////////////////////////////////////////////////////////////// + + private String generatePerLocusSummary(File rReadablePlotFile, List calcs) { + String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_LOCUS_R_ARGUMENTS; + try { + Process p = Runtime.getRuntime().exec(rCommand); + } catch ( IOException e ) { + throw new StingException("Error executing r command for per locus plot generation",e); + } + + StringBuilder summary = new StringBuilder(); + summary.append(String.format("%s%n","PER_LOCUS_SUMMARY")); + int numSamples = calcs.size()-2; + int numGoodSamples = 0; + int numGoodSamplesByMeanCvg = 0; + double totalAvgCoverage = -1; + double totalStdevCoverage = -1; + + for ( DepthStatisticsCalculator calc : calcs ) { + if ( calc.getName().equalsIgnoreCase("total_coverage")) { + totalAvgCoverage = calc.getMean(); + totalStdevCoverage = Math.sqrt(calc.getVar()); + } else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { + if ( calc.getPercentWellCoveredLoci() > 0.8 ) { + numGoodSamples++; + } + + if ( calc.getMean() > 10 ) { + numGoodSamplesByMeanCvg++; + } + } + } + + summary.append(String.format("%s\t%f%n","Average_Coverage:",totalAvgCoverage)); + summary.append(String.format("%s\t%f%n","Stdev_Coverage:",totalStdevCoverage)); + summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Base", ( (double) numGoodSamples*100 )/( (double) numSamples))); + summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Mean", ( (double) numGoodSamplesByMeanCvg*100) / ( (double) numSamples ))); + + return summary.toString(); + } + + private String generatePerTargetSummary(File rReadablePlotFile, List calcs) { + String rCommand = PATH_TO_RSCRIPT+" "+PATH_TO_RESOURCES+" "+rReadablePlotFile.getAbsolutePath()+" "+PER_TARGET_R_ARGUMENTS; + try { + Process p = Runtime.getRuntime().exec(rCommand); + } catch ( IOException e ) { + throw new StingException("Error executing r command for per locus plot generation",e); + } + + StringBuilder summary = new StringBuilder(); + summary.append(String.format("%s%n","PER_TARGET_SUMMARY")); + int numSamples = calcs.size()-2; + int numGoodSamples = 0; + + for ( DepthStatisticsCalculator calc : calcs ) { + if ( calc.getName().equalsIgnoreCase("total_coverage")) { + // do nothing + } else if ( ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { + if ( calc.getPercentWellCoveredTargets() > 0.8 ) { + numGoodSamples++; + } + } + } + + summary.append(String.format("%s\t%.2f%n","%Well_Covered_Samples_By_Target", ( (double) numGoodSamples*100) / ( (double) numSamples ))); + + return summary.toString(); + } + + /////////////////////////////////////////////////////////////////////////////////// + // R-READABLE TEMPORARY FILE CREATION + /////////////////////////////////////////////////////////////////////////////////// + + private File writeBaseSummaryFile(List calcs) { + File perLocusSummaryFile; + + try { + perLocusSummaryFile = File.createTempFile(plotBaseName+"_per_locus_summary",".txt"); + } catch ( IOException e ) { + throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e); + } + + PrintWriter locusWriter; + + try { + locusWriter = new PrintWriter(perLocusSummaryFile); + } catch ( IOException e ) { + throw new StingException("Locus summary temporary file was created but could not be opened.",e); + } + + for ( DepthStatisticsCalculator calc : calcs ) { + if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { + locusWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getLocusProportions()); + locusWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints()); + } + } + + locusWriter.close(); + return perLocusSummaryFile; + } + + private File writeTargetSumamryFile(List calcs) { + File perTargetSummaryFile; + + try { + perTargetSummaryFile = File.createTempFile(plotBaseName+"_per_target_summary",".txt"); + } catch ( IOException e ) { + throw new StingException("Could not create a temporary file. Please check the permissions of the directory you are running in, and that the base name is not a filepath.",e); + } + + PrintWriter targetWriter; + + try { + targetWriter = new PrintWriter(perTargetSummaryFile); + } catch ( IOException e ) { + throw new StingException("Target summary temporary file was created but could not be opened.",e); + } + + for ( DepthStatisticsCalculator calc : calcs ) { + if ( ! calc.getName().equalsIgnoreCase("total_coverage") && ! calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { + targetWriter.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f",calc.getName(),calc.getTargetProportions()); + targetWriter.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d",calc.getName(),calc.getEvalPoints()); + } + } + + targetWriter.close(); + return perTargetSummaryFile; + } + + /////////////////////////////////////////////////////////////////////////////////// + // READING THE DEPTH OF COVERAGE FILE INTO CALCULATOR OBJECTS + /////////////////////////////////////////////////////////////////////////////////// + + private List calculateDepthStatistics(File docFile) { + BufferedReader docReader; + + try { + docReader = new BufferedReader( new FileReader(docFile) ); + } catch ( IOException e) { + throw new StingException("The file "+docFile.getAbsolutePath()+" could not be opened...",e); + } + + String locusHeader = getDOCSectionHeader(docReader); // this will read to the first section header + List docCalculators; + if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_LOCUS_COVERAGE_SECTION")) { + containsByLocus = true; + docCalculators = instantiateDOCCalculators(docReader); + updateLocusInfo(docCalculators,docReader); + String targetHeader = getDOCSectionHeader(docReader); + if ( targetHeader != null && targetHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) { + containsByTarget = true; + updateTargetInfo(docCalculators,docReader); + } else { + containsByTarget = false; + } + } else if ( locusHeader != null && locusHeader.equalsIgnoreCase("PER_TARGET_COVERAGE_SECTION") ) { + containsByTarget = true; + containsByLocus = false; + docCalculators = instantiateDOCCalculators(docReader); + updateTargetInfo(docCalculators,docReader); + } else { + containsByLocus = false; + containsByTarget = false; + docCalculators = null; + } + + return docCalculators; + } + + private List instantiateDOCCalculators(BufferedReader reader) { + String header; + try { + header = reader.readLine(); + } catch (IOException e) { + throw new StingException("Unable to read the section header",e); + } + + List calcs = new ArrayList(); + + int offset = -1; + for ( String entry : header.split("\t") ) { + if ( offset > -1 ) { + calcs.add(new DepthStatisticsCalculator(entry)); + } + offset++; + } + + return calcs; + } + + private void updateLocusInfo(List calcs, BufferedReader reader) { + + String docLocLine; + try { + docLocLine = reader.readLine(); + while ( ! isEndOfSection(docLocLine) ) { + int offset = -1; + for ( String entry : docLocLine.split("\t") ) { + if ( offset > -1 ) { + calcs.get(offset).updateLocus(Integer.parseInt(entry)); + } + offset++; + } + } + } catch ( IOException e) { + throw new StingException("Error reading locus depth of coverage information",e); + } + + } + + private void updateTargetInfo(List calcs, BufferedReader reader) { + + String docLocLine; + try { + docLocLine = reader.readLine(); + while ( ! isEndOfSection(docLocLine) ) { + int offset = -1; + int targetSize = 0; + for ( String entry : docLocLine.split("\t") ) { + if ( offset == -1 ) { + targetSize = parseInterval(entry); + } else { + calcs.get(offset).updateTargets(targetSize,Integer.parseInt(entry)); + } + offset++; + } + } + } catch ( IOException e ) { + throw new StingException("Error reading target depth of coverage information",e); + } + + } + + /////////////////////////////////////////////////////////////////////////////////// + // FILE IO METHODS -- DEPEND ON DEPTH OF COVERAGE FILE FORMAT + /////////////////////////////////////////////////////////////////////////////////// + + private boolean isEndOfSection( String line ) { + // sections delimited by empty line + return line.equalsIgnoreCase(""); + } + + private String getDOCSectionHeader(BufferedReader reader) { + String header; + try { + do { + header = reader.readLine(); + } while ( ! isDOCSectionSeparator(header) && header != null); + + } catch (IOException e) { + throw new StingException("Error reading depth of coverage file",e); + } + + return header; + } + + private boolean isDOCSectionSeparator( String line ) { + return line.contains("_COVERAGE_SECTION"); + } + + private int parseInterval(String interval) { + String startstop = interval.split(":")[1]; + int start = Integer.parseInt(startstop.split("-")[0]); + int stop = Integer.parseInt(startstop.split("-")[1]); + return stop - start; + } + +} + +/////////////////////////////////////////////////////////////////////////////////// +// PROGRAM START -- THE MAIN() METHOD AND WRAPPER CLASS +/////////////////////////////////////////////////////////////////////////////////// + +public class AnalyzeDepthOfCoverage { + + public static void main(String[] args) { + AnalyzeDepthCLP depthAnalysis = new AnalyzeDepthCLP(); + CommandLineProgram.start(depthAnalysis,args); + System.exit(0); + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java index f268c6f46..097af88ce 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java @@ -25,7 +25,7 @@ public class DepthStatisticsCalculator extends SummaryStatisticsCalculator { private int targetsAbove99x; private int numTargets; - public static double[] DEPTH_CUTOFFS = {1,4,10,25,50,100}; + public static int[] DEPTH_CUTOFFS = {1,4,10,25,50,100}; public DepthStatisticsCalculator(String name) { super(name); @@ -125,4 +125,8 @@ public class DepthStatisticsCalculator extends SummaryStatisticsCalculator { public double getPercentWellCoveredTargets() { return 10*( (double) targetsAbove9x )/( (double) numTargets ); } + + public int[] getEvalPoints() { + return DepthStatisticsCalculator.DEPTH_CUTOFFS; + } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java index af9896123..1ae1d6ff3 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java @@ -17,18 +17,18 @@ import java.util.*; class FirehoseSummaryCLP extends CommandLineProgram { @Argument(fullName = "depthOfCoverageFile", shortName = "doc", doc="Path to the depth of coverage file", required=true) private File depthOfCoverage = null; - @Argument(fullName = "contaminationFile", shortName = "con", doc="Path to the contamination file", required=true) - private File contamination = null; - @Argument(fullName = "errorRateFile", shortName = "err", doc="Path to the error rate file", required=true) - private File errorRate = null; - @Argument(fullName = "zipFiles", shortName = "zip", doc="List of paths to zip files which contain summary metrics files", required=false) - private String zipFiles = null; +// @Argument(fullName = "contaminationFile", shortName = "con", doc="Path to the contamination file", required=true) +// private File contamination = null; +// @Argument(fullName = "errorRateFile", shortName = "err", doc="Path to the error rate file", required=true) +// private File errorRate = null; +// @Argument(fullName = "zipFiles", shortName = "zip", doc="List of paths to zip files which contain summary metrics files", required=false) +// private String zipFiles = null; private static String R_SCRIPT = "plotFirehoseDataQCMetrics.R"; private static String SCRIPT_DOC_FLAG = "DOC"; protected int execute() { - SummaryFileCollection metricsFiles = getFileHandles(); +// SummaryFileCollection metricsFiles = getFileHandles(); List depthStats = calculateDepthStatistics(depthOfCoverage); String docSummary = makeDOCPlots(depthStats); return 1; @@ -85,18 +85,18 @@ class FirehoseSummaryCLP extends CommandLineProgram { return "temporary"; } - private SummaryFileCollection getFileHandles() { - if ( zipFiles == null ) { - return null; - } - - SummaryFileCollection summaryFiles = new SummaryFileCollection(); - for ( String zipFile : zipFiles.split(",") ) { - summaryFiles.process(zipFile); - } - - return summaryFiles; - } +// private SummaryFileCollection getFileHandles() { +// if ( zipFiles == null ) { +// return null; +// } +// +// SummaryFileCollection summaryFiles = new SummaryFileCollection(); +// for ( String zipFile : zipFiles.split(",") ) { +// summaryFiles.process(zipFile); +// } +// +// return summaryFiles; +// } private List calculateDepthStatistics(File docFile) { BufferedReader docReader; @@ -219,39 +219,39 @@ public class GenerateFirehoseSummary { } } -class SummaryFileCollection { - - // container class for files we'll be summarizing - - public Map fingerprintSummaryFiles; - public Map hybridSelectionMetricsFiles; - public Map insertSizeDistributionFiles; - public Map alignmentMetricsFiles; - - public SummaryFileCollection() { - fingerprintSummaryFiles = new HashMap(); - hybridSelectionMetricsFiles = new HashMap(); - insertSizeDistributionFiles = new HashMap(); - alignmentMetricsFiles = new HashMap(); - } - - public void process(String zipFilePath) { - String sampleName = zipFilePath.split("_sequencing_metrics.zip")[0].split("_")[1]; - File fingerprintSummaryFile = new File(sampleName+".summary_fingerprint_metrics"); - File hybridSelectionFile = new File(sampleName+".hybrid_selection_metrics"); - File insertSizeFile = new File(sampleName+".insert_size_metrics"); - File alignmentFile = new File(sampleName+".alignment_metrics"); - - String command = "unzip "+zipFilePath; - try { - Process p = Runtime.getRuntime().exec(command); - } catch (IOException e) { - throw new RuntimeException("Could not unzip the file "+zipFilePath); - } - - fingerprintSummaryFiles.put(sampleName,fingerprintSummaryFile); - hybridSelectionMetricsFiles.put(sampleName,hybridSelectionFile); - insertSizeDistributionFiles.put(sampleName,insertSizeFile); - alignmentMetricsFiles.put(sampleName,alignmentFile); - } -} \ No newline at end of file +//class SummaryFileCollection { +// +// // container class for files we'll be summarizing +// +// public Map fingerprintSummaryFiles; +// public Map hybridSelectionMetricsFiles; +// public Map insertSizeDistributionFiles; +// public Map alignmentMetricsFiles; +// +// public SummaryFileCollection() { +// fingerprintSummaryFiles = new HashMap(); +// hybridSelectionMetricsFiles = new HashMap(); +// insertSizeDistributionFiles = new HashMap(); +// alignmentMetricsFiles = new HashMap(); +// } +// +// public void process(String zipFilePath) { +// String sampleName = zipFilePath.split("_sequencing_metrics.zip")[0].split("_")[1]; +// File fingerprintSummaryFile = new File(sampleName+".summary_fingerprint_metrics"); +// File hybridSelectionFile = new File(sampleName+".hybrid_selection_metrics"); +// File insertSizeFile = new File(sampleName+".insert_size_metrics"); +// File alignmentFile = new File(sampleName+".alignment_metrics"); +// +// String command = "unzip "+zipFilePath; +// try { +// Process p = Runtime.getRuntime().exec(command); +// } catch (IOException e) { +// throw new RuntimeException("Could not unzip the file "+zipFilePath); +// } +// +// fingerprintSummaryFiles.put(sampleName,fingerprintSummaryFile); +// hybridSelectionMetricsFiles.put(sampleName,hybridSelectionFile); +// insertSizeDistributionFiles.put(sampleName,insertSizeFile); +// alignmentMetricsFiles.put(sampleName,alignmentFile); +// } +//} \ No newline at end of file