Depth of coverage now uses longs rather than ints. We can now successfully run on the Lepidosiren paradoxa genome. (about 80 GB)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3859 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-07-23 14:14:12 +00:00
parent 536399eaa0
commit 973934f769
2 changed files with 25 additions and 25 deletions

View File

@ -23,10 +23,10 @@ public class DepthOfCoverageStats {
// STANDARD DATA // STANDARD DATA
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin private Map<String,long[]> granularHistogramBySample; // holds the counts per each bin
private Map<String,Long> totalCoverages; // holds total coverage per sample private Map<String,Long> totalCoverages; // holds total coverage per sample
private int[] binLeftEndpoints; // describes the left endpoint for each bin private int[] binLeftEndpoints; // describes the left endpoint for each bin
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage private long[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
private boolean tabulateLocusCounts = false; private boolean tabulateLocusCounts = false;
private long nLoci; // number of loci seen private long nLoci; // number of loci seen
private long totalDepthOfCoverage; private long totalDepthOfCoverage;
@ -77,7 +77,7 @@ public class DepthOfCoverageStats {
public DepthOfCoverageStats(int[] leftEndpoints) { public DepthOfCoverageStats(int[] leftEndpoints) {
this.binLeftEndpoints = leftEndpoints; this.binLeftEndpoints = leftEndpoints;
granularHistogramBySample = new HashMap<String,int[]>(); granularHistogramBySample = new HashMap<String,long[]>();
totalCoverages = new HashMap<String,Long>(); totalCoverages = new HashMap<String,Long>();
nLoci = 0; nLoci = 0;
totalLocusDepth = 0; totalLocusDepth = 0;
@ -86,10 +86,10 @@ public class DepthOfCoverageStats {
public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) { public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) {
this.binLeftEndpoints = cloneMe.binLeftEndpoints; this.binLeftEndpoints = cloneMe.binLeftEndpoints;
granularHistogramBySample = new HashMap<String,int[]>(); granularHistogramBySample = new HashMap<String,long[]>();
totalCoverages = new HashMap<String,Long>(); totalCoverages = new HashMap<String,Long>();
for ( String s : cloneMe.getAllSamples() ) { for ( String s : cloneMe.getAllSamples() ) {
granularHistogramBySample.put(s,new int[cloneMe.getHistograms().get(s).length]); granularHistogramBySample.put(s,new long[cloneMe.getHistograms().get(s).length]);
for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) { for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) {
granularHistogramBySample.get(s)[i] = cloneMe.getHistograms().get(s)[i]; granularHistogramBySample.get(s)[i] = cloneMe.getHistograms().get(s)[i];
} }
@ -98,7 +98,7 @@ public class DepthOfCoverageStats {
this.includeDeletions = cloneMe.includeDeletions; this.includeDeletions = cloneMe.includeDeletions;
if ( cloneMe.tabulateLocusCounts ) { if ( cloneMe.tabulateLocusCounts ) {
this.locusCoverageCounts = new int[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length]; this.locusCoverageCounts = new long[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length];
} }
//this.granularHistogramBySample = cloneMe.granularHistogramBySample; //this.granularHistogramBySample = cloneMe.granularHistogramBySample;
//this.totalCoverages = cloneMe.totalCoverages; //this.totalCoverages = cloneMe.totalCoverages;
@ -112,7 +112,7 @@ public class DepthOfCoverageStats {
return; return;
} }
int[] binCounts = new int[this.binLeftEndpoints.length+1]; long[] binCounts = new long[this.binLeftEndpoints.length+1];
for ( int b = 0; b < binCounts.length; b ++ ) { for ( int b = 0; b < binCounts.length; b ++ ) {
binCounts[b] = 0; binCounts[b] = 0;
} }
@ -122,7 +122,7 @@ public class DepthOfCoverageStats {
} }
public void initializeLocusCounts() { public void initializeLocusCounts() {
locusCoverageCounts = new int[granularHistogramBySample.size()][binLeftEndpoints.length+1]; locusCoverageCounts = new long[granularHistogramBySample.size()][binLeftEndpoints.length+1];
locusHistogram = new int[binLeftEndpoints.length+1]; locusHistogram = new int[binLeftEndpoints.length+1];
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) { for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) { for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
@ -190,7 +190,7 @@ public class DepthOfCoverageStats {
private int updateSample(String sample, int depth) { private int updateSample(String sample, int depth) {
totalCoverages.put(sample,totalCoverages.get(sample)+depth); totalCoverages.put(sample,totalCoverages.get(sample)+depth);
int[] granularBins = granularHistogramBySample.get(sample); long[] granularBins = granularHistogramBySample.get(sample);
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) { for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
if ( depth < binLeftEndpoints[b] ) { if ( depth < binLeftEndpoints[b] ) {
granularBins[b]++; granularBins[b]++;
@ -212,11 +212,11 @@ public class DepthOfCoverageStats {
} }
private void mergeSamples(DepthOfCoverageStats otherStats) { private void mergeSamples(DepthOfCoverageStats otherStats) {
Map<String,int[]> otherHistogram = otherStats.getHistograms(); Map<String,long[]> otherHistogram = otherStats.getHistograms();
Map<String,Double> otherMeans = otherStats.getMeans(); Map<String,Double> otherMeans = otherStats.getMeans();
for ( String s : this.getAllSamples() ) { for ( String s : this.getAllSamples() ) {
int[] internalCounts = granularHistogramBySample.get(s); long[] internalCounts = granularHistogramBySample.get(s);
int[] externalCounts = otherHistogram.get(s); long[] externalCounts = otherHistogram.get(s);
for ( int b = 0; b < internalCounts.length; b++ ) { for ( int b = 0; b < internalCounts.length; b++ ) {
internalCounts[b] += externalCounts[b]; internalCounts[b] += externalCounts[b];
} }
@ -225,7 +225,7 @@ public class DepthOfCoverageStats {
} }
} }
private void mergeLocusCounts( int[][] otherCounts ) { private void mergeLocusCounts( long[][] otherCounts ) {
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) { for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) { for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
locusCoverageCounts[a][b] += otherCounts[a][b]; locusCoverageCounts[a][b] += otherCounts[a][b];
@ -260,11 +260,11 @@ public class DepthOfCoverageStats {
// ACCESSOR METHODS // ACCESSOR METHODS
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
public Map<String,int[]> getHistograms() { public Map<String,long[]> getHistograms() {
return granularHistogramBySample; return granularHistogramBySample;
} }
public int[][] getLocusCounts() { public long[][] getLocusCounts() {
return locusCoverageCounts; return locusCoverageCounts;
} }
@ -302,7 +302,7 @@ public class DepthOfCoverageStats {
} }
public double[] getCoverageProportions(String sample) { public double[] getCoverageProportions(String sample) {
int[] hist = granularHistogramBySample.get(sample); long[] hist = granularHistogramBySample.get(sample);
double[] distribution = new double[hist.length]; double[] distribution = new double[hist.length];
long count = 0; long count = 0;
for ( int i = hist.length-1; i >= 0; i -- ) { for ( int i = hist.length-1; i >= 0; i -- ) {

View File

@ -602,12 +602,12 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator)); hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator));
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1])); hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
output.print(hBuilder.toString()); output.print(hBuilder.toString());
Map<String,int[]> histograms = stats.getHistograms(); Map<String,long[]> histograms = stats.getHistograms();
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) { for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
StringBuilder sBuilder = new StringBuilder(); StringBuilder sBuilder = new StringBuilder();
sBuilder.append(String.format("sample_%s",p.getKey())); sBuilder.append(String.format("sample_%s",p.getKey()));
for ( int count : p.getValue() ) { for ( long count : p.getValue() ) {
sBuilder.append(String.format("%s%d",separator,count)); sBuilder.append(String.format("%s%d",separator,count));
} }
sBuilder.append(String.format("%n")); sBuilder.append(String.format("%n"));
@ -625,7 +625,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
int[] endpoints = stats.getEndpoints(); int[] endpoints = stats.getEndpoints();
int samples = stats.getHistograms().size(); int samples = stats.getHistograms().size();
int[][] baseCoverageCumDist = stats.getLocusCounts(); long[][] baseCoverageCumDist = stats.getLocusCounts();
// rows - # of samples // rows - # of samples
// columns - depth of coverage // columns - depth of coverage
@ -693,14 +693,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
output.printf("%n"); output.printf("%n");
Map<String,int[]> histograms = stats.getHistograms(); Map<String,long[]> histograms = stats.getHistograms();
Map<String,Double> means = stats.getMeans(); Map<String,Double> means = stats.getMeans();
Map<String,Long> totals = stats.getTotals(); Map<String,Long> totals = stats.getTotals();
int[] leftEnds = stats.getEndpoints(); int[] leftEnds = stats.getEndpoints();
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) { for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
String s = p.getKey(); String s = p.getKey();
int[] histogram = p.getValue(); long[] histogram = p.getValue();
int median = getQuantile(histogram,0.5); int median = getQuantile(histogram,0.5);
int q1 = getQuantile(histogram,0.25); int q1 = getQuantile(histogram,0.25);
int q3 = getQuantile(histogram,0.75); int q3 = getQuantile(histogram,0.75);
@ -728,7 +728,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
} }
} }
private int getQuantile(int[] histogram, double prop) { private int getQuantile(long[] histogram, double prop) {
int total = 0; int total = 0;
for ( int i = 0; i < histogram.length; i ++ ) { for ( int i = 0; i < histogram.length; i ++ ) {
@ -745,7 +745,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
return bin == -1 ? 0 : bin; return bin == -1 ? 0 : bin;
} }
private double getPctBasesAbove(int[] histogram, int bin) { private double getPctBasesAbove(long[] histogram, int bin) {
long below = 0l; long below = 0l;
long above = 0l; long above = 0l;
for ( int index = 0; index < histogram.length; index++) { for ( int index = 0; index < histogram.length; index++) {