Quick commit in advance of Aaron's. Just a bunch of refactoring (private classes separated out, put in proper package). Also support added for coverage by read group rather than sample.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2897 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
622554d7bd
commit
87f8fb7282
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -31,7 +31,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
// todo [DONE] -- add per target (e.g. regional) aggregation
|
// todo [DONE] -- add per target (e.g. regional) aggregation
|
||||||
// todo [DONE] -- add ability to print out the calculated bins and quit (for pre-analysis bin size selection)
|
// todo [DONE] -- add ability to print out the calculated bins and quit (for pre-analysis bin size selection)
|
||||||
// todo -- refactor the location of the ALL_SAMPLE metrics [keep out of the per-sample HashMaps]
|
// todo [DONE] -- refactor the location of the ALL_SAMPLE metrics [keep out of the per-sample HashMaps]
|
||||||
// todo [DONE] -- per locus output through -o
|
// todo [DONE] -- per locus output through -o
|
||||||
// todo -- support for using read groups instead of samples
|
// todo -- support for using read groups instead of samples
|
||||||
// todo -- coverage without deletions
|
// todo -- coverage without deletions
|
||||||
|
|
@ -62,6 +62,8 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
boolean printBinEndpointsAndExit = false;
|
boolean printBinEndpointsAndExit = false;
|
||||||
@Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false)
|
@Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false)
|
||||||
boolean omitSampleSummary = false;
|
boolean omitSampleSummary = false;
|
||||||
|
@Argument(fullName = "useReadGroups", shortName = "rg", doc = "Split depth of coverage output by read group rather than by sample", required = false)
|
||||||
|
boolean useReadGroup = false;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// STANDARD WALKER METHODS
|
// STANDARD WALKER METHODS
|
||||||
|
|
@ -86,13 +88,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
if ( ! omitDepthOutput ) { // print header
|
if ( ! omitDepthOutput ) { // print header
|
||||||
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
||||||
// get all the samples
|
// get all the samples
|
||||||
HashSet<String> allSamples = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
|
HashSet<String> allSamples = getSamplesFromToolKit(useReadGroup);
|
||||||
|
|
||||||
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders()) {
|
|
||||||
for (String s : sampleSet) {
|
|
||||||
allSamples.add(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( String s : allSamples) {
|
for ( String s : allSamples) {
|
||||||
out.printf("\t%s_%s","Depth_for",s);
|
out.printf("\t%s_%s","Depth_for",s);
|
||||||
|
|
@ -104,21 +100,39 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
out.printf("Per-Locus Depth of Coverage output was omitted");
|
out.printf("Per-Locus Depth of Coverage output was omitted");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private HashSet<String> getSamplesFromToolKit( boolean getReadGroupsInstead ) {
|
||||||
|
HashSet<String> partitions = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
|
||||||
|
|
||||||
|
if ( getReadGroupsInstead ) {
|
||||||
|
for ( Set<String> rgSet : getToolkit().getMergedReadGroupsByReaders() ) {
|
||||||
|
for ( String rg : rgSet ) {
|
||||||
|
partitions.add(rg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders()) {
|
||||||
|
for (String s : sampleSet) {
|
||||||
|
partitions.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return partitions;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isReduceByInterval() {
|
public boolean isReduceByInterval() {
|
||||||
return ( ! omitIntervals );
|
return ( ! omitIntervals );
|
||||||
}
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats reduceInit() {
|
public DepthOfCoverageStats reduceInit() {
|
||||||
List<Set<String>> samplesByReaders = getToolkit().getSamplesByReaders();
|
|
||||||
DepthOfCoverageStats stats = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
DepthOfCoverageStats stats = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
||||||
|
|
||||||
for ( Set<String> sampleSet : samplesByReaders ) {
|
for ( String sample : getSamplesFromToolKit(useReadGroup) ) {
|
||||||
for ( String sample : sampleSet ) {
|
stats.addSample(sample);
|
||||||
stats.addSample(sample);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( ! omitLocusTable ) {
|
if ( ! omitLocusTable ) {
|
||||||
stats.initializeLocusCounts();
|
stats.initializeLocusCounts();
|
||||||
}
|
}
|
||||||
|
|
@ -127,7 +141,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Map<String,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
Map<String,StratifiedAlignmentContext> contextsBySample =
|
Map<String, StratifiedAlignmentContext> contextsBySample =
|
||||||
StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
||||||
HashMap<String,Integer> depthBySample = new HashMap<String,Integer>();
|
HashMap<String,Integer> depthBySample = new HashMap<String,Integer>();
|
||||||
|
|
||||||
|
|
@ -479,241 +493,4 @@ public class CoverageStatistics extends LocusWalker<Map<String,Integer>, DepthOf
|
||||||
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class DepthOfCoverageStats {
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// STATIC DATA
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
/* none so far */
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// STANDARD DATA
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
|
|
||||||
private Map<String,Long> totalCoverages; // holds total coverage per sample
|
|
||||||
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
|
||||||
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
|
||||||
private boolean tabulateLocusCounts = false;
|
|
||||||
private long nLoci; // number of loci seen
|
|
||||||
private long totalDepthOfCoverage;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// TEMPORARY DATA ( not worth re-instantiating )
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
private int[] locusHistogram; // holds a histogram for each locus; reset after each update() call
|
|
||||||
private int totalLocusDepth; // holds the total depth of coverage for each locus; reset after each update() call
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// STATIC METHODS
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
public static int[] calculateBinEndpoints(int lower, int upper, int bins) {
|
|
||||||
if ( bins > upper - lower || lower < 1 ) {
|
|
||||||
throw new IllegalArgumentException("Illegal argument to calculateBinEndpoints; "+
|
|
||||||
"lower bound must be at least 1, and number of bins may not exceed stop - start");
|
|
||||||
}
|
|
||||||
|
|
||||||
int[] binLeftEndpoints = new int[bins+1];
|
|
||||||
binLeftEndpoints[0] = lower;
|
|
||||||
|
|
||||||
int length = upper - lower;
|
|
||||||
double scale = Math.log10((double) length)/bins;
|
|
||||||
|
|
||||||
for ( int b = 1; b < bins ; b++ ) {
|
|
||||||
int leftEnd = lower + (int) Math.floor(Math.pow(10.0,(b-1.0)*scale));
|
|
||||||
// todo -- simplify to length^(scale/bins); make non-constant to put bin ends in more "useful"
|
|
||||||
// todo -- positions on the number line
|
|
||||||
while ( leftEnd <= binLeftEndpoints[b-1] ) {
|
|
||||||
leftEnd++;
|
|
||||||
}
|
|
||||||
|
|
||||||
binLeftEndpoints[b] = leftEnd;
|
|
||||||
}
|
|
||||||
|
|
||||||
binLeftEndpoints[binLeftEndpoints.length-1] = upper;
|
|
||||||
|
|
||||||
return binLeftEndpoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// INITIALIZATION METHODS
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
public DepthOfCoverageStats(int[] leftEndpoints) {
|
|
||||||
this.binLeftEndpoints = leftEndpoints;
|
|
||||||
granularHistogramBySample = new HashMap<String,int[]>();
|
|
||||||
totalCoverages = new HashMap<String,Long>();
|
|
||||||
nLoci = 0;
|
|
||||||
totalLocusDepth = 0;
|
|
||||||
totalDepthOfCoverage = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addSample(String sample) {
|
|
||||||
if ( granularHistogramBySample.containsKey(sample) ) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int[] binCounts = new int[this.binLeftEndpoints.length+1];
|
|
||||||
for ( int b = 0; b < binCounts.length; b ++ ) {
|
|
||||||
binCounts[b] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
granularHistogramBySample.put(sample,binCounts);
|
|
||||||
totalCoverages.put(sample,0l);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initializeLocusCounts() {
|
|
||||||
locusCoverageCounts = new int[granularHistogramBySample.size()][binLeftEndpoints.length+1];
|
|
||||||
locusHistogram = new int[binLeftEndpoints.length+1];
|
|
||||||
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
|
|
||||||
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
|
|
||||||
locusCoverageCounts[a][b] = 0;
|
|
||||||
}
|
|
||||||
locusHistogram[b] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
tabulateLocusCounts = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// UPDATE METHODS
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
public void update(Map<String,Integer> depthBySample) {
|
|
||||||
int b;
|
|
||||||
for ( String sample : granularHistogramBySample.keySet() ) {
|
|
||||||
if ( depthBySample.containsKey(sample) ) {
|
|
||||||
b = updateSample(sample,depthBySample.get(sample));
|
|
||||||
totalLocusDepth += depthBySample.get(sample);
|
|
||||||
} else {
|
|
||||||
b = updateSample(sample,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( tabulateLocusCounts ) {
|
|
||||||
for ( int i = 0; i <= b; i ++ ) {
|
|
||||||
locusHistogram[i]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
updateLocusCounts(locusHistogram);
|
|
||||||
|
|
||||||
nLoci++;
|
|
||||||
totalDepthOfCoverage += totalLocusDepth;
|
|
||||||
totalLocusDepth = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int updateSample(String sample, int depth) {
|
|
||||||
totalCoverages.put(sample,totalCoverages.get(sample)+depth);
|
|
||||||
|
|
||||||
int[] granularBins = granularHistogramBySample.get(sample);
|
|
||||||
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
|
|
||||||
if ( depth < binLeftEndpoints[b] ) {
|
|
||||||
granularBins[b]++;
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
granularBins[binLeftEndpoints.length]++; // greater than all left-endpoints
|
|
||||||
return binLeftEndpoints.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void merge(DepthOfCoverageStats newStats) {
|
|
||||||
this.mergeSamples(newStats);
|
|
||||||
if ( this.tabulateLocusCounts && newStats.tabulateLocusCounts ) {
|
|
||||||
this.mergeLocusCounts(newStats.getLocusCounts());
|
|
||||||
}
|
|
||||||
nLoci += newStats.getTotalLoci();
|
|
||||||
totalDepthOfCoverage += newStats.getTotalCoverage();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mergeSamples(DepthOfCoverageStats otherStats) {
|
|
||||||
Map<String,int[]> otherHistogram = otherStats.getHistograms();
|
|
||||||
Map<String,Double> otherMeans = otherStats.getMeans();
|
|
||||||
for ( String s : this.getAllSamples() ) {
|
|
||||||
int[] internalCounts = granularHistogramBySample.get(s);
|
|
||||||
int[] externalCounts = otherHistogram.get(s);
|
|
||||||
for ( int b = 0; b < internalCounts.length; b++ ) {
|
|
||||||
internalCounts[b] += externalCounts[b];
|
|
||||||
}
|
|
||||||
|
|
||||||
this.totalCoverages.put(s, this.totalCoverages.get(s) + otherStats.totalCoverages.get(s));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mergeLocusCounts( int[][] otherCounts ) {
|
|
||||||
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
|
|
||||||
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
|
|
||||||
locusCoverageCounts[a][b] += otherCounts[a][b];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Update locus counts -- takes an array in which the number of samples
|
|
||||||
* with depth ABOVE [i] is held. So if the bin left endpoints were 2, 5, 10
|
|
||||||
* then we'd have an array that represented:
|
|
||||||
* [# samples with depth 0 - inf], [# samples with depth 2 - inf],
|
|
||||||
* [# samples with depth 5 - inf], [# samples with depth 10-inf];
|
|
||||||
*
|
|
||||||
* this is
|
|
||||||
* @argument cumulativeSamplesByDepthBin - see above
|
|
||||||
*/
|
|
||||||
private void updateLocusCounts(int[] cumulativeSamplesByDepthBin) {
|
|
||||||
if ( tabulateLocusCounts ) {
|
|
||||||
for ( int bin = 0; bin < cumulativeSamplesByDepthBin.length; bin ++ ) {
|
|
||||||
int numSamples = cumulativeSamplesByDepthBin[bin];
|
|
||||||
for ( int i = 0; i < numSamples; i ++ ) {
|
|
||||||
locusCoverageCounts[i][bin]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
cumulativeSamplesByDepthBin[bin] = 0; // reset counts in advance of next update()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// ACCESSOR METHODS
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
public Map<String,int[]> getHistograms() {
|
|
||||||
return granularHistogramBySample;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[][] getLocusCounts() {
|
|
||||||
return locusCoverageCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] getEndpoints() {
|
|
||||||
return binLeftEndpoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String,Double> getMeans() {
|
|
||||||
HashMap<String,Double> means = new HashMap<String,Double>();
|
|
||||||
for ( String s : getAllSamples() ) {
|
|
||||||
means.put(s,( (double)totalCoverages.get(s))/( (double) nLoci ));
|
|
||||||
}
|
|
||||||
|
|
||||||
return means;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getTotalLoci() {
|
|
||||||
return nLoci;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<String> getAllSamples() {
|
|
||||||
return granularHistogramBySample.keySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getTotalMeanCoverage() {
|
|
||||||
return ( (double) totalDepthOfCoverage )/ ( (double) nLoci );
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getTotalCoverage() {
|
|
||||||
return totalDepthOfCoverage;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,248 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Feb 26, 2010
|
||||||
|
*/
|
||||||
|
public class DepthOfCoverageStats {
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// STATIC DATA
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/* none so far */
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// STANDARD DATA
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
|
||||||
|
private Map<String,Long> totalCoverages; // holds total coverage per sample
|
||||||
|
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
||||||
|
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
||||||
|
private boolean tabulateLocusCounts = false;
|
||||||
|
private long nLoci; // number of loci seen
|
||||||
|
private long totalDepthOfCoverage;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// TEMPORARY DATA ( not worth re-instantiating )
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
private int[] locusHistogram; // holds a histogram for each locus; reset after each update() call
|
||||||
|
private int totalLocusDepth; // holds the total depth of coverage for each locus; reset after each update() call
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// STATIC METHODS
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public static int[] calculateBinEndpoints(int lower, int upper, int bins) {
|
||||||
|
if ( bins > upper - lower || lower < 1 ) {
|
||||||
|
throw new IllegalArgumentException("Illegal argument to calculateBinEndpoints; "+
|
||||||
|
"lower bound must be at least 1, and number of bins may not exceed stop - start");
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] binLeftEndpoints = new int[bins+1];
|
||||||
|
binLeftEndpoints[0] = lower;
|
||||||
|
|
||||||
|
int length = upper - lower;
|
||||||
|
double scale = Math.log10((double) length)/bins;
|
||||||
|
|
||||||
|
for ( int b = 1; b < bins ; b++ ) {
|
||||||
|
int leftEnd = lower + (int) Math.floor(Math.pow(10.0,(b-1.0)*scale));
|
||||||
|
// todo -- simplify to length^(scale/bins); make non-constant to put bin ends in more "useful"
|
||||||
|
// todo -- positions on the number line
|
||||||
|
while ( leftEnd <= binLeftEndpoints[b-1] ) {
|
||||||
|
leftEnd++;
|
||||||
|
}
|
||||||
|
|
||||||
|
binLeftEndpoints[b] = leftEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
binLeftEndpoints[binLeftEndpoints.length-1] = upper;
|
||||||
|
|
||||||
|
return binLeftEndpoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// INITIALIZATION METHODS
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public DepthOfCoverageStats(int[] leftEndpoints) {
|
||||||
|
this.binLeftEndpoints = leftEndpoints;
|
||||||
|
granularHistogramBySample = new HashMap<String,int[]>();
|
||||||
|
totalCoverages = new HashMap<String,Long>();
|
||||||
|
nLoci = 0;
|
||||||
|
totalLocusDepth = 0;
|
||||||
|
totalDepthOfCoverage = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSample(String sample) {
|
||||||
|
if ( granularHistogramBySample.containsKey(sample) ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] binCounts = new int[this.binLeftEndpoints.length+1];
|
||||||
|
for ( int b = 0; b < binCounts.length; b ++ ) {
|
||||||
|
binCounts[b] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
granularHistogramBySample.put(sample,binCounts);
|
||||||
|
totalCoverages.put(sample,0l);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeLocusCounts() {
|
||||||
|
locusCoverageCounts = new int[granularHistogramBySample.size()][binLeftEndpoints.length+1];
|
||||||
|
locusHistogram = new int[binLeftEndpoints.length+1];
|
||||||
|
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
|
||||||
|
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
|
||||||
|
locusCoverageCounts[a][b] = 0;
|
||||||
|
}
|
||||||
|
locusHistogram[b] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
tabulateLocusCounts = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// UPDATE METHODS
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public void update(Map<String,Integer> depthBySample) {
|
||||||
|
int b;
|
||||||
|
for ( String sample : granularHistogramBySample.keySet() ) {
|
||||||
|
if ( depthBySample.containsKey(sample) ) {
|
||||||
|
b = updateSample(sample,depthBySample.get(sample));
|
||||||
|
totalLocusDepth += depthBySample.get(sample);
|
||||||
|
} else {
|
||||||
|
b = updateSample(sample,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( tabulateLocusCounts ) {
|
||||||
|
for ( int i = 0; i <= b; i ++ ) {
|
||||||
|
locusHistogram[i]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
updateLocusCounts(locusHistogram);
|
||||||
|
|
||||||
|
nLoci++;
|
||||||
|
totalDepthOfCoverage += totalLocusDepth;
|
||||||
|
totalLocusDepth = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int updateSample(String sample, int depth) {
|
||||||
|
totalCoverages.put(sample,totalCoverages.get(sample)+depth);
|
||||||
|
|
||||||
|
int[] granularBins = granularHistogramBySample.get(sample);
|
||||||
|
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
|
||||||
|
if ( depth < binLeftEndpoints[b] ) {
|
||||||
|
granularBins[b]++;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
granularBins[binLeftEndpoints.length]++; // greater than all left-endpoints
|
||||||
|
return binLeftEndpoints.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void merge(DepthOfCoverageStats newStats) {
|
||||||
|
this.mergeSamples(newStats);
|
||||||
|
if ( this.tabulateLocusCounts && newStats.tabulateLocusCounts ) {
|
||||||
|
this.mergeLocusCounts(newStats.getLocusCounts());
|
||||||
|
}
|
||||||
|
nLoci += newStats.getTotalLoci();
|
||||||
|
totalDepthOfCoverage += newStats.getTotalCoverage();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeSamples(DepthOfCoverageStats otherStats) {
|
||||||
|
Map<String,int[]> otherHistogram = otherStats.getHistograms();
|
||||||
|
Map<String,Double> otherMeans = otherStats.getMeans();
|
||||||
|
for ( String s : this.getAllSamples() ) {
|
||||||
|
int[] internalCounts = granularHistogramBySample.get(s);
|
||||||
|
int[] externalCounts = otherHistogram.get(s);
|
||||||
|
for ( int b = 0; b < internalCounts.length; b++ ) {
|
||||||
|
internalCounts[b] += externalCounts[b];
|
||||||
|
}
|
||||||
|
|
||||||
|
this.totalCoverages.put(s, this.totalCoverages.get(s) + otherStats.totalCoverages.get(s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeLocusCounts( int[][] otherCounts ) {
|
||||||
|
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
|
||||||
|
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
|
||||||
|
locusCoverageCounts[a][b] += otherCounts[a][b];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update locus counts -- takes an array in which the number of samples
|
||||||
|
* with depth ABOVE [i] is held. So if the bin left endpoints were 2, 5, 10
|
||||||
|
* then we'd have an array that represented:
|
||||||
|
* [# samples with depth 0 - inf], [# samples with depth 2 - inf],
|
||||||
|
* [# samples with depth 5 - inf], [# samples with depth 10-inf];
|
||||||
|
*
|
||||||
|
* this is
|
||||||
|
* @argument cumulativeSamplesByDepthBin - see above
|
||||||
|
*/
|
||||||
|
private void updateLocusCounts(int[] cumulativeSamplesByDepthBin) {
|
||||||
|
if ( tabulateLocusCounts ) {
|
||||||
|
for ( int bin = 0; bin < cumulativeSamplesByDepthBin.length; bin ++ ) {
|
||||||
|
int numSamples = cumulativeSamplesByDepthBin[bin];
|
||||||
|
for ( int i = 0; i < numSamples; i ++ ) {
|
||||||
|
locusCoverageCounts[i][bin]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
cumulativeSamplesByDepthBin[bin] = 0; // reset counts in advance of next update()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// ACCESSOR METHODS
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
public Map<String,int[]> getHistograms() {
|
||||||
|
return granularHistogramBySample;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[][] getLocusCounts() {
|
||||||
|
return locusCoverageCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getEndpoints() {
|
||||||
|
return binLeftEndpoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String,Double> getMeans() {
|
||||||
|
HashMap<String,Double> means = new HashMap<String,Double>();
|
||||||
|
for ( String s : getAllSamples() ) {
|
||||||
|
means.put(s,( (double)totalCoverages.get(s))/( (double) nLoci ));
|
||||||
|
}
|
||||||
|
|
||||||
|
return means;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTotalLoci() {
|
||||||
|
return nLoci;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getAllSamples() {
|
||||||
|
return granularHistogramBySample.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getTotalMeanCoverage() {
|
||||||
|
return ( (double) totalDepthOfCoverage )/ ( (double) nLoci );
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTotalCoverage() {
|
||||||
|
return totalDepthOfCoverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
Loading…
Reference in New Issue