Initial commit of integration test(s) for CoverageStatistics, currently in progress [midway commit is for Matt]
Modifications to CoverageStatistics - now includes and extends much of the behavior of DepthOfCoverage (per-base output, per-target output). Additional functionality (coverage without deletions, base counts, by read group instead of by sample) is upcoming. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2888 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
553d39bb00
commit
3d92e5a737
|
|
@ -8,6 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.By;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -15,7 +17,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -27,13 +28,21 @@ import java.util.Set;
|
||||||
* distribution of % bases and % targets covered for certain depths. The granularity of DOC can be set by command
|
* distribution of % bases and % targets covered for certain depths. The granularity of DOC can be set by command
|
||||||
* line arguments.
|
* line arguments.
|
||||||
*
|
*
|
||||||
* // todo -- alter logarithmic scaling to spread out bins more
|
|
||||||
* // todo -- allow for user to set linear binning (default is logarithmic)
|
|
||||||
* // todo -- add per target (e.g. regional) aggregation
|
|
||||||
*
|
*
|
||||||
* @Author chartl
|
* @Author chartl
|
||||||
* @Date Feb 22, 2010
|
* @Date Feb 22, 2010
|
||||||
*/
|
*/
|
||||||
|
// todo [DONE] -- add per target (e.g. regional) aggregation
|
||||||
|
// todo [DONE] -- add ability to print out the calculated bins and quit (for pre-analysis bin size selection)
|
||||||
|
// todo -- refactor the location of the ALL_SAMPLE metrics [keep out of the per-sample HashMaps]
|
||||||
|
// todo [DONE] -- per locus output through -o
|
||||||
|
// todo -- support for using read groups instead of samples
|
||||||
|
// todo -- coverage without deletions
|
||||||
|
// todo -- base counts
|
||||||
|
// todo -- support for aggregate (ignoring sample IDs) granular histograms; maybe n*[start,stop], bins*sqrt(n)
|
||||||
|
// todo -- alter logarithmic scaling to spread out bins more
|
||||||
|
// todo -- allow for user to set linear binning (default is logarithmic)
|
||||||
|
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOfCoverageStats> implements TreeReducible<DepthOfCoverageStats> {
|
public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOfCoverageStats> implements TreeReducible<DepthOfCoverageStats> {
|
||||||
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
||||||
|
|
@ -46,17 +55,42 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
byte minMappingQuality = 50;
|
byte minMappingQuality = 50;
|
||||||
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to 20.", required = false)
|
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to 20.", required = false)
|
||||||
byte minBaseQuality = 20;
|
byte minBaseQuality = 20;
|
||||||
@Argument(fullName = "perLocusStatisticsFile", shortName = "locusFile", doc = "File to output per-locus statistics to; if unprovided these will not be calculated", required = false)
|
@Argument(fullName = "omitLocusTable", shortName = "omitLocus", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false)
|
||||||
File perLocusStatisticsFile = null;
|
boolean omitLocusTable = false;
|
||||||
@Argument(fullName = "perSampleStatisticsFile", shortName = "sampleFile", doc = "File to output per-sample statistics to; if unprovided will go to standard (-o) output", required = false)
|
@Argument(fullName = "omitIntervalStatistics", shortName = "omitIntervals", doc = "Will omit the per-interval statistics section, which should result in speedup", required = false)
|
||||||
File perSampleStatisticsFile = null;
|
boolean omitIntervals = false;
|
||||||
@Argument(fullName = "summaryStatisticsFile", shortName = "summaryFile", doc = "File to output summary (mean, median) statistics to; if unprovided will go to standard (-o) output", required = false)
|
@Argument(fullName = "omitDepthOutputAtEachBase", shortName = "omitBaseOutput", doc = "Will omit the output of the depth of coverage at each base, which should result in speedup", required = false)
|
||||||
File summaryStatisticsFile = null;
|
boolean omitDepthOutput = false;
|
||||||
|
@Argument(fullName = "printBinEndpointsAndExit", doc = "Prints the bin values and exits immediately. Use to calibrate what bins you want before running on data.", required = false)
|
||||||
|
boolean printBinEndpointsAndExit = false;
|
||||||
|
@Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false)
|
||||||
|
boolean omitSampleSummary = false;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// STANDARD WALKER METHODS
|
// STANDARD WALKER METHODS
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
|
||||||
|
if ( printBinEndpointsAndExit ) {
|
||||||
|
int[] endpoints = DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins);
|
||||||
|
System.out.print("[ ");
|
||||||
|
for ( int e : endpoints ) {
|
||||||
|
System.out.print(e+" ");
|
||||||
|
}
|
||||||
|
System.out.println("]");
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( getToolkit().getArguments().outFileName == null ) {
|
||||||
|
throw new StingException("This walker requires that you specify an output file (-o)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isReduceByInterval() {
|
||||||
|
return ( ! omitIntervals );
|
||||||
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats reduceInit() {
|
public DepthOfCoverageStats reduceInit() {
|
||||||
List<Set<String>> samplesByReaders = getToolkit().getSamplesByReaders();
|
List<Set<String>> samplesByReaders = getToolkit().getSamplesByReaders();
|
||||||
DepthOfCoverageStats stats = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
DepthOfCoverageStats stats = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
||||||
|
|
@ -67,10 +101,20 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( perLocusStatisticsFile != null ) {
|
if ( ! omitLocusTable ) {
|
||||||
stats.initializeLocusCounts();
|
stats.initializeLocusCounts();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( ! omitDepthOutput ) { // print header
|
||||||
|
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
||||||
|
for ( String s : stats.getAllSamples()) {
|
||||||
|
out.printf("\t%s_%s","Depth_for",s);
|
||||||
|
}
|
||||||
|
out.printf("%n");
|
||||||
|
} else {
|
||||||
|
out.printf("Per-Locus Depth of Coverage output was omitted");
|
||||||
|
}
|
||||||
|
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -91,11 +135,20 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
depthBySample.put(sample,properDepth);
|
depthBySample.put(sample,properDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( ! omitDepthOutput ) {
|
||||||
|
out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives)
|
||||||
|
}
|
||||||
|
|
||||||
return depthBySample;
|
return depthBySample;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats reduce(Map<String,Integer> thisMap, DepthOfCoverageStats prevReduce) {
|
public DepthOfCoverageStats reduce(Map<String,Integer> thisMap, DepthOfCoverageStats prevReduce) {
|
||||||
prevReduce.update(thisMap);
|
prevReduce.update(thisMap);
|
||||||
|
if ( ! omitDepthOutput ) {
|
||||||
|
printDepths(out,thisMap, prevReduce.getAllSamples());
|
||||||
|
// this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without
|
||||||
|
// turning on omit
|
||||||
|
}
|
||||||
return prevReduce;
|
return prevReduce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -104,10 +157,185 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// INTERVAL ON TRAVERSAL DONE
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public void onTraversalDone( List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByInterval ) {
|
||||||
|
File intervalStatisticsFile = deriveFromStream("interval_statistics");
|
||||||
|
File intervalSummaryFile = deriveFromStream("interval_summary");
|
||||||
|
DepthOfCoverageStats mergedStats = printIntervalStatsAndMerge(statsByInterval,intervalSummaryFile, intervalStatisticsFile);
|
||||||
|
this.onTraversalDone(mergedStats);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DepthOfCoverageStats printIntervalStatsAndMerge(List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByInterval, File summaryFile, File statsFile) {
|
||||||
|
PrintStream summaryOut;
|
||||||
|
PrintStream statsOut;
|
||||||
|
|
||||||
|
try {
|
||||||
|
summaryOut = summaryFile == null ? out : new PrintStream(summaryFile);
|
||||||
|
statsOut = statsFile == null ? out : new PrintStream(statsFile);
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new StingException("Unable to open interval file on reduce", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Pair<GenomeLoc,DepthOfCoverageStats> firstPair = statsByInterval.remove(0);
|
||||||
|
DepthOfCoverageStats firstStats = firstPair.second;
|
||||||
|
|
||||||
|
StringBuilder summaryHeader = new StringBuilder();
|
||||||
|
summaryHeader.append("Target");
|
||||||
|
summaryHeader.append("\ttotal_coverage");
|
||||||
|
summaryHeader.append("\taverage_coverage");
|
||||||
|
|
||||||
|
for ( String s : firstStats.getAllSamples() ) {
|
||||||
|
summaryHeader.append("\t");
|
||||||
|
summaryHeader.append(s);
|
||||||
|
summaryHeader.append("_mean_cvg");
|
||||||
|
summaryHeader.append("\t");
|
||||||
|
summaryHeader.append(s);
|
||||||
|
summaryHeader.append("_granular_Q1");
|
||||||
|
summaryHeader.append("\t");
|
||||||
|
summaryHeader.append(s);
|
||||||
|
summaryHeader.append("_granular_median");
|
||||||
|
summaryHeader.append("\t");
|
||||||
|
summaryHeader.append(s);
|
||||||
|
summaryHeader.append("_granular_Q3");
|
||||||
|
}
|
||||||
|
|
||||||
|
summaryOut.printf("%s%n",summaryHeader);
|
||||||
|
|
||||||
|
int[][] nTargetsByAvgCvgBySample = new int[firstStats.getHistograms().size()][firstStats.getEndpoints().length+1];
|
||||||
|
for ( int i = 0; i < nTargetsByAvgCvgBySample.length; i ++ ) {
|
||||||
|
for ( int b = 0; b < nTargetsByAvgCvgBySample[0].length; b++) {
|
||||||
|
nTargetsByAvgCvgBySample[i][b] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printTargetSummary(summaryOut,firstPair);
|
||||||
|
updateTargetTable(nTargetsByAvgCvgBySample,firstStats);
|
||||||
|
|
||||||
|
for ( Pair<GenomeLoc,DepthOfCoverageStats> targetStats : statsByInterval ) {
|
||||||
|
printTargetSummary(summaryOut,targetStats);
|
||||||
|
updateTargetTable(nTargetsByAvgCvgBySample,targetStats.second);
|
||||||
|
firstStats = this.treeReduce(firstStats,targetStats.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
printIntervalTable(statsOut,nTargetsByAvgCvgBySample,firstStats.getEndpoints());
|
||||||
|
|
||||||
|
summaryOut.close();
|
||||||
|
statsOut.close();
|
||||||
|
|
||||||
|
return firstStats;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printTargetSummary(PrintStream output, Pair<GenomeLoc,DepthOfCoverageStats> intervalStats) {
|
||||||
|
DepthOfCoverageStats stats = intervalStats.second;
|
||||||
|
int[] bins = stats.getEndpoints();
|
||||||
|
StringBuilder targetSummary = new StringBuilder();
|
||||||
|
targetSummary.append(intervalStats.first.toString());
|
||||||
|
targetSummary.append("\t");
|
||||||
|
targetSummary.append(stats.getTotalLoci()*stats.getMeans().get(DepthOfCoverageStats.ALL_SAMPLES));
|
||||||
|
// TODO: change this to use the raw counts directly rather than re-estimating from mean*nloci
|
||||||
|
targetSummary.append("\t");
|
||||||
|
targetSummary.append(stats.getMeans().get(DepthOfCoverageStats.ALL_SAMPLES));
|
||||||
|
|
||||||
|
for ( String s : stats.getAllSamples() ) {
|
||||||
|
targetSummary.append("\t");
|
||||||
|
targetSummary.append(String.format("%.2f", stats.getMeans().get(s)));
|
||||||
|
targetSummary.append("\t");
|
||||||
|
int median = getQuantile(stats.getHistograms().get(s),0.5);
|
||||||
|
int q1 = getQuantile(stats.getHistograms().get(s),0.25);
|
||||||
|
int q3 = getQuantile(stats.getHistograms().get(s),0.75);
|
||||||
|
targetSummary.append(bins[q1]);
|
||||||
|
targetSummary.append("\t");
|
||||||
|
targetSummary.append(bins[median]);
|
||||||
|
targetSummary.append("\t");
|
||||||
|
targetSummary.append(bins[q3]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
output.printf("%s%n", targetSummary);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printIntervalTable(PrintStream output, int[][] intervalTable, int[] cutoffs) {
|
||||||
|
output.printf("\tdepth>=%d",0);
|
||||||
|
for ( int col = 0; col < intervalTable[0].length-1; col ++ ) {
|
||||||
|
output.printf("\tdepth>=%d",cutoffs[col]);
|
||||||
|
}
|
||||||
|
|
||||||
|
output.printf(String.format("%n"));
|
||||||
|
for ( int row = 0; row < intervalTable.length; row ++ ) {
|
||||||
|
output.printf("At_least_%d_samples",row+1);
|
||||||
|
for ( int col = 0; col < intervalTable[0].length; col++ ) {
|
||||||
|
output.printf("\t%d",intervalTable[row][col]);
|
||||||
|
}
|
||||||
|
output.printf(String.format("%n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @updateTargetTable
|
||||||
|
* The idea is to have counts for how many *targets* have at least K samples with
|
||||||
|
* median coverage of at least X.
|
||||||
|
* To that end:
|
||||||
|
* Iterate over the samples the DOCS object, determine how many there are with
|
||||||
|
* median coverage > leftEnds[0]; how many with median coverage > leftEnds[1]
|
||||||
|
* and so on. Then this target has at least N, N-1, N-2, ... 1, 0 samples covered
|
||||||
|
* to leftEnds[0] and at least M,M-1,M-2,...1,0 samples covered to leftEnds[1]
|
||||||
|
* and so on.
|
||||||
|
*/
|
||||||
|
private void updateTargetTable(int[][] table, DepthOfCoverageStats stats) {
|
||||||
|
int[] cutoffs = stats.getEndpoints();
|
||||||
|
int[] countsOfMediansAboveCutoffs = new int[cutoffs.length+1]; // 0 bin to catch everything
|
||||||
|
for ( int i = 0; i < countsOfMediansAboveCutoffs.length; i ++) {
|
||||||
|
countsOfMediansAboveCutoffs[i]=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( String s : stats.getAllSamples() ) {
|
||||||
|
int medianBin = getQuantile(stats.getHistograms().get(s),0.5);
|
||||||
|
for ( int i = 0; i <= medianBin; i ++) {
|
||||||
|
countsOfMediansAboveCutoffs[i]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int medianBin = 0; medianBin < countsOfMediansAboveCutoffs.length; medianBin++) {
|
||||||
|
for ( ; countsOfMediansAboveCutoffs[medianBin] > 0; countsOfMediansAboveCutoffs[medianBin]-- ) {
|
||||||
|
table[countsOfMediansAboveCutoffs[medianBin]-1][medianBin]++;
|
||||||
|
// the -1 is due to counts being 1-based and offsets being 0-based
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// FINAL ON TRAVERSAL DONE
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
public void onTraversalDone(DepthOfCoverageStats coverageProfiles) {
|
public void onTraversalDone(DepthOfCoverageStats coverageProfiles) {
|
||||||
printSummary(out,summaryStatisticsFile,coverageProfiles);
|
///////////////////
|
||||||
printPerSample(out,perSampleStatisticsFile,coverageProfiles);
|
// OPTIONAL OUTPUTS
|
||||||
printPerLocus(perLocusStatisticsFile,coverageProfiles);
|
//////////////////
|
||||||
|
|
||||||
|
if ( ! omitSampleSummary ) {
|
||||||
|
File summaryStatisticsFile = deriveFromStream("summary_statistics");
|
||||||
|
File perSampleStatisticsFile = deriveFromStream("sample_statistics");
|
||||||
|
printSummary(out,summaryStatisticsFile,coverageProfiles);
|
||||||
|
printPerSample(out,perSampleStatisticsFile,coverageProfiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! omitLocusTable ) {
|
||||||
|
File perLocusStatisticsFile = deriveFromStream("locus_statistics");
|
||||||
|
printPerLocus(perLocusStatisticsFile,coverageProfiles);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public File deriveFromStream(String append) {
|
||||||
|
String name = getToolkit().getArguments().outFileName;
|
||||||
|
if ( name.contains("stdout") || name.contains("Stdout") || name.contains("STDOUT")) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return new File(name+"."+append);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -194,9 +422,14 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
int[] leftEnds = stats.getEndpoints();
|
int[] leftEnds = stats.getEndpoints();
|
||||||
|
|
||||||
for ( String s : histograms.keySet() ) {
|
for ( String s : histograms.keySet() ) {
|
||||||
int median = getQuantile(histograms.get(s),0.5);
|
int[] histogram = histograms.get(s);
|
||||||
int q1 = getQuantile(histograms.get(s),0.25);
|
int median = getQuantile(histogram,0.5);
|
||||||
int q3 = getQuantile(histograms.get(s),0.75);
|
int q1 = getQuantile(histogram,0.25);
|
||||||
|
int q3 = getQuantile(histogram,0.75);
|
||||||
|
// if any of these are larger than the higest bin, put the median as in the largest bin
|
||||||
|
median = median == histogram.length-1 ? histogram.length-2 : median;
|
||||||
|
q1 = q1 == histogram.length-1 ? histogram.length-2 : q1;
|
||||||
|
q3 = q3 == histogram.length-1 ? histogram.length-2 : q3;
|
||||||
output.printf("%s\t%.2f\t%d\t%d\t%d%n",s,means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
output.printf("%s\t%.2f\t%d\t%d\t%d%n",s,means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -217,22 +450,47 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
bin++;
|
bin++;
|
||||||
}
|
}
|
||||||
|
|
||||||
bin = bin == histogram.length-1 ? histogram.length-2 : bin;
|
|
||||||
|
|
||||||
return bin;
|
return bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void printDepths(PrintStream stream, Map<String,Integer> depthBySample, Set<String> allSamples) {
|
||||||
|
// get the depths per sample and build up the output string while tabulating total and average coverage
|
||||||
|
StringBuilder perSampleOutput = new StringBuilder();
|
||||||
|
int tDepth = 0;
|
||||||
|
for ( String s : allSamples ) {
|
||||||
|
perSampleOutput.append("\t");
|
||||||
|
int dp = depthBySample.keySet().contains(s) ? depthBySample.get(s) : 0;
|
||||||
|
perSampleOutput.append(dp);
|
||||||
|
tDepth += dp;
|
||||||
|
}
|
||||||
|
// remember -- genome locus was printed in map()
|
||||||
|
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class DepthOfCoverageStats {
|
class DepthOfCoverageStats {
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// STATIC DATA
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
public static String ALL_SAMPLES = "ALL_COMBINED_SAMPLES";
|
public static String ALL_SAMPLES = "ALL_COMBINED_SAMPLES";
|
||||||
// STANDARD (constantly updated) DATA
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// STANDARD DATA
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
|
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
|
||||||
private Map<String,Double> meanCoverages; // holds mean coverage per sample
|
private Map<String,Double> meanCoverages; // holds mean coverage per sample
|
||||||
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
||||||
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
||||||
private boolean tabulateLocusCounts = false;
|
private boolean tabulateLocusCounts = false;
|
||||||
private int nLoci; // number of loci seen
|
private int nLoci; // number of loci seen
|
||||||
// TEMPORARY DATA (not worth re-instantiating every time)
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// TEMPORARY DATA ( not worth re-instantiating )
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
private int[] locusHistogram; // holds a histogram for each locus; reset after each update() call
|
private int[] locusHistogram; // holds a histogram for each locus; reset after each update() call
|
||||||
private int totalDepth; // holds the total depth of coverage for each locus; reset after each update() call
|
private int totalDepth; // holds the total depth of coverage for each locus; reset after each update() call
|
||||||
|
|
||||||
|
|
@ -442,4 +700,8 @@ class DepthOfCoverageStats {
|
||||||
return nLoci;
|
return nLoci;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Set<String> getAllSamples() {
|
||||||
|
return granularHistogramBySample.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Feb 25, 2010
|
||||||
|
*/
|
||||||
|
public class CoverageStatisticsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
private boolean RUN_TESTS = false;
|
||||||
|
private String root = "-T CoverageStatistics ";
|
||||||
|
|
||||||
|
private String buildRootCmd(String ref, String bam, String interval) {
|
||||||
|
return root + "-R "+ref+" -I "+bam+" -L "+interval+" -o %s";
|
||||||
|
}
|
||||||
|
|
||||||
|
private void execute(String name, WalkerTestSpec spec) {
|
||||||
|
if ( RUN_TESTS ) {
|
||||||
|
executeTest(name,spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBaseOutputNoFiltering() {
|
||||||
|
String bam_file = "/humgen/gsa-hphome1/chartl/projects/depthOfCoverage/testFiles/bams/Ciliopathies_1_88534_3_samples.bam";
|
||||||
|
String interval_list = "chr1:855534";
|
||||||
|
String reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||||
|
String cmd = buildRootCmd(reference,bam_file,interval_list) + " -mmq 0 -mbq 0 -omitSampleSummary -omitIntervals -omitLocus";
|
||||||
|
String expected = "2aee1dbcb69bf1e874d56cd23336afa8";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(cmd,1, Arrays.asList(expected));
|
||||||
|
execute("testBaseOutputNoFiltering",spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue