Depth of coverage now uses longs rather than ints. We can now successfully run on the Lepidosiren paradoxa genome. (about 80 GB)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3859 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-07-23 14:14:12 +00:00
parent 536399eaa0
commit 973934f769
2 changed files with 25 additions and 25 deletions

View File

@ -23,10 +23,10 @@ public class DepthOfCoverageStats {
// STANDARD DATA
////////////////////////////////////////////////////////////////////////////////////
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
private Map<String,long[]> granularHistogramBySample; // holds the counts per each bin
private Map<String,Long> totalCoverages; // holds total coverage per sample
private int[] binLeftEndpoints; // describes the left endpoint for each bin
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
private long[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
private boolean tabulateLocusCounts = false;
private long nLoci; // number of loci seen
private long totalDepthOfCoverage;
@ -77,7 +77,7 @@ public class DepthOfCoverageStats {
public DepthOfCoverageStats(int[] leftEndpoints) {
this.binLeftEndpoints = leftEndpoints;
granularHistogramBySample = new HashMap<String,int[]>();
granularHistogramBySample = new HashMap<String,long[]>();
totalCoverages = new HashMap<String,Long>();
nLoci = 0;
totalLocusDepth = 0;
@ -86,10 +86,10 @@ public class DepthOfCoverageStats {
public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) {
this.binLeftEndpoints = cloneMe.binLeftEndpoints;
granularHistogramBySample = new HashMap<String,int[]>();
granularHistogramBySample = new HashMap<String,long[]>();
totalCoverages = new HashMap<String,Long>();
for ( String s : cloneMe.getAllSamples() ) {
granularHistogramBySample.put(s,new int[cloneMe.getHistograms().get(s).length]);
granularHistogramBySample.put(s,new long[cloneMe.getHistograms().get(s).length]);
for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) {
granularHistogramBySample.get(s)[i] = cloneMe.getHistograms().get(s)[i];
}
@ -98,7 +98,7 @@ public class DepthOfCoverageStats {
this.includeDeletions = cloneMe.includeDeletions;
if ( cloneMe.tabulateLocusCounts ) {
this.locusCoverageCounts = new int[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length];
this.locusCoverageCounts = new long[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length];
}
//this.granularHistogramBySample = cloneMe.granularHistogramBySample;
//this.totalCoverages = cloneMe.totalCoverages;
@ -112,7 +112,7 @@ public class DepthOfCoverageStats {
return;
}
int[] binCounts = new int[this.binLeftEndpoints.length+1];
long[] binCounts = new long[this.binLeftEndpoints.length+1];
for ( int b = 0; b < binCounts.length; b ++ ) {
binCounts[b] = 0;
}
@ -122,7 +122,7 @@ public class DepthOfCoverageStats {
}
public void initializeLocusCounts() {
locusCoverageCounts = new int[granularHistogramBySample.size()][binLeftEndpoints.length+1];
locusCoverageCounts = new long[granularHistogramBySample.size()][binLeftEndpoints.length+1];
locusHistogram = new int[binLeftEndpoints.length+1];
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
@ -190,7 +190,7 @@ public class DepthOfCoverageStats {
private int updateSample(String sample, int depth) {
totalCoverages.put(sample,totalCoverages.get(sample)+depth);
int[] granularBins = granularHistogramBySample.get(sample);
long[] granularBins = granularHistogramBySample.get(sample);
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
if ( depth < binLeftEndpoints[b] ) {
granularBins[b]++;
@ -212,11 +212,11 @@ public class DepthOfCoverageStats {
}
private void mergeSamples(DepthOfCoverageStats otherStats) {
Map<String,int[]> otherHistogram = otherStats.getHistograms();
Map<String,long[]> otherHistogram = otherStats.getHistograms();
Map<String,Double> otherMeans = otherStats.getMeans();
for ( String s : this.getAllSamples() ) {
int[] internalCounts = granularHistogramBySample.get(s);
int[] externalCounts = otherHistogram.get(s);
long[] internalCounts = granularHistogramBySample.get(s);
long[] externalCounts = otherHistogram.get(s);
for ( int b = 0; b < internalCounts.length; b++ ) {
internalCounts[b] += externalCounts[b];
}
@ -225,7 +225,7 @@ public class DepthOfCoverageStats {
}
}
private void mergeLocusCounts( int[][] otherCounts ) {
private void mergeLocusCounts( long[][] otherCounts ) {
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
locusCoverageCounts[a][b] += otherCounts[a][b];
@ -260,11 +260,11 @@ public class DepthOfCoverageStats {
// ACCESSOR METHODS
////////////////////////////////////////////////////////////////////////////////////
public Map<String,int[]> getHistograms() {
public Map<String,long[]> getHistograms() {
return granularHistogramBySample;
}
public int[][] getLocusCounts() {
public long[][] getLocusCounts() {
return locusCoverageCounts;
}
@ -302,7 +302,7 @@ public class DepthOfCoverageStats {
}
public double[] getCoverageProportions(String sample) {
int[] hist = granularHistogramBySample.get(sample);
long[] hist = granularHistogramBySample.get(sample);
double[] distribution = new double[hist.length];
long count = 0;
for ( int i = hist.length-1; i >= 0; i -- ) {

View File

@ -602,12 +602,12 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator));
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
output.print(hBuilder.toString());
Map<String,int[]> histograms = stats.getHistograms();
Map<String,long[]> histograms = stats.getHistograms();
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) {
for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
StringBuilder sBuilder = new StringBuilder();
sBuilder.append(String.format("sample_%s",p.getKey()));
for ( int count : p.getValue() ) {
for ( long count : p.getValue() ) {
sBuilder.append(String.format("%s%d",separator,count));
}
sBuilder.append(String.format("%n"));
@ -625,7 +625,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
int[] endpoints = stats.getEndpoints();
int samples = stats.getHistograms().size();
int[][] baseCoverageCumDist = stats.getLocusCounts();
long[][] baseCoverageCumDist = stats.getLocusCounts();
// rows - # of samples
// columns - depth of coverage
@ -693,14 +693,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
output.printf("%n");
Map<String,int[]> histograms = stats.getHistograms();
Map<String,long[]> histograms = stats.getHistograms();
Map<String,Double> means = stats.getMeans();
Map<String,Long> totals = stats.getTotals();
int[] leftEnds = stats.getEndpoints();
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) {
for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
String s = p.getKey();
int[] histogram = p.getValue();
long[] histogram = p.getValue();
int median = getQuantile(histogram,0.5);
int q1 = getQuantile(histogram,0.25);
int q3 = getQuantile(histogram,0.75);
@ -728,7 +728,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
}
}
private int getQuantile(int[] histogram, double prop) {
private int getQuantile(long[] histogram, double prop) {
int total = 0;
for ( int i = 0; i < histogram.length; i ++ ) {
@ -745,7 +745,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
return bin == -1 ? 0 : bin;
}
private double getPctBasesAbove(int[] histogram, int bin) {
private double getPctBasesAbove(long[] histogram, int bin) {
long below = 0l;
long above = 0l;
for ( int index = 0; index < histogram.length; index++) {