Depth of coverage now uses longs rather than ints. We can now successfully run on the Lepidosiren paradoxa genome. (about 80 GB)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3859 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
536399eaa0
commit
973934f769
|
|
@ -23,10 +23,10 @@ public class DepthOfCoverageStats {
|
||||||
// STANDARD DATA
|
// STANDARD DATA
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
private Map<String,int[]> granularHistogramBySample; // holds the counts per each bin
|
private Map<String,long[]> granularHistogramBySample; // holds the counts per each bin
|
||||||
private Map<String,Long> totalCoverages; // holds total coverage per sample
|
private Map<String,Long> totalCoverages; // holds total coverage per sample
|
||||||
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
private int[] binLeftEndpoints; // describes the left endpoint for each bin
|
||||||
private int[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
private long[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
|
||||||
private boolean tabulateLocusCounts = false;
|
private boolean tabulateLocusCounts = false;
|
||||||
private long nLoci; // number of loci seen
|
private long nLoci; // number of loci seen
|
||||||
private long totalDepthOfCoverage;
|
private long totalDepthOfCoverage;
|
||||||
|
|
@ -77,7 +77,7 @@ public class DepthOfCoverageStats {
|
||||||
|
|
||||||
public DepthOfCoverageStats(int[] leftEndpoints) {
|
public DepthOfCoverageStats(int[] leftEndpoints) {
|
||||||
this.binLeftEndpoints = leftEndpoints;
|
this.binLeftEndpoints = leftEndpoints;
|
||||||
granularHistogramBySample = new HashMap<String,int[]>();
|
granularHistogramBySample = new HashMap<String,long[]>();
|
||||||
totalCoverages = new HashMap<String,Long>();
|
totalCoverages = new HashMap<String,Long>();
|
||||||
nLoci = 0;
|
nLoci = 0;
|
||||||
totalLocusDepth = 0;
|
totalLocusDepth = 0;
|
||||||
|
|
@ -86,10 +86,10 @@ public class DepthOfCoverageStats {
|
||||||
|
|
||||||
public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) {
|
public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) {
|
||||||
this.binLeftEndpoints = cloneMe.binLeftEndpoints;
|
this.binLeftEndpoints = cloneMe.binLeftEndpoints;
|
||||||
granularHistogramBySample = new HashMap<String,int[]>();
|
granularHistogramBySample = new HashMap<String,long[]>();
|
||||||
totalCoverages = new HashMap<String,Long>();
|
totalCoverages = new HashMap<String,Long>();
|
||||||
for ( String s : cloneMe.getAllSamples() ) {
|
for ( String s : cloneMe.getAllSamples() ) {
|
||||||
granularHistogramBySample.put(s,new int[cloneMe.getHistograms().get(s).length]);
|
granularHistogramBySample.put(s,new long[cloneMe.getHistograms().get(s).length]);
|
||||||
for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) {
|
for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) {
|
||||||
granularHistogramBySample.get(s)[i] = cloneMe.getHistograms().get(s)[i];
|
granularHistogramBySample.get(s)[i] = cloneMe.getHistograms().get(s)[i];
|
||||||
}
|
}
|
||||||
|
|
@ -98,7 +98,7 @@ public class DepthOfCoverageStats {
|
||||||
|
|
||||||
this.includeDeletions = cloneMe.includeDeletions;
|
this.includeDeletions = cloneMe.includeDeletions;
|
||||||
if ( cloneMe.tabulateLocusCounts ) {
|
if ( cloneMe.tabulateLocusCounts ) {
|
||||||
this.locusCoverageCounts = new int[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length];
|
this.locusCoverageCounts = new long[cloneMe.locusCoverageCounts.length][cloneMe.locusCoverageCounts[0].length];
|
||||||
}
|
}
|
||||||
//this.granularHistogramBySample = cloneMe.granularHistogramBySample;
|
//this.granularHistogramBySample = cloneMe.granularHistogramBySample;
|
||||||
//this.totalCoverages = cloneMe.totalCoverages;
|
//this.totalCoverages = cloneMe.totalCoverages;
|
||||||
|
|
@ -112,7 +112,7 @@ public class DepthOfCoverageStats {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] binCounts = new int[this.binLeftEndpoints.length+1];
|
long[] binCounts = new long[this.binLeftEndpoints.length+1];
|
||||||
for ( int b = 0; b < binCounts.length; b ++ ) {
|
for ( int b = 0; b < binCounts.length; b ++ ) {
|
||||||
binCounts[b] = 0;
|
binCounts[b] = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -122,7 +122,7 @@ public class DepthOfCoverageStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initializeLocusCounts() {
|
public void initializeLocusCounts() {
|
||||||
locusCoverageCounts = new int[granularHistogramBySample.size()][binLeftEndpoints.length+1];
|
locusCoverageCounts = new long[granularHistogramBySample.size()][binLeftEndpoints.length+1];
|
||||||
locusHistogram = new int[binLeftEndpoints.length+1];
|
locusHistogram = new int[binLeftEndpoints.length+1];
|
||||||
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
|
for ( int b = 0; b < binLeftEndpoints.length+1; b ++ ) {
|
||||||
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
|
for ( int a = 0; a < granularHistogramBySample.size(); a ++ ) {
|
||||||
|
|
@ -190,7 +190,7 @@ public class DepthOfCoverageStats {
|
||||||
private int updateSample(String sample, int depth) {
|
private int updateSample(String sample, int depth) {
|
||||||
totalCoverages.put(sample,totalCoverages.get(sample)+depth);
|
totalCoverages.put(sample,totalCoverages.get(sample)+depth);
|
||||||
|
|
||||||
int[] granularBins = granularHistogramBySample.get(sample);
|
long[] granularBins = granularHistogramBySample.get(sample);
|
||||||
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
|
for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
|
||||||
if ( depth < binLeftEndpoints[b] ) {
|
if ( depth < binLeftEndpoints[b] ) {
|
||||||
granularBins[b]++;
|
granularBins[b]++;
|
||||||
|
|
@ -212,11 +212,11 @@ public class DepthOfCoverageStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeSamples(DepthOfCoverageStats otherStats) {
|
private void mergeSamples(DepthOfCoverageStats otherStats) {
|
||||||
Map<String,int[]> otherHistogram = otherStats.getHistograms();
|
Map<String,long[]> otherHistogram = otherStats.getHistograms();
|
||||||
Map<String,Double> otherMeans = otherStats.getMeans();
|
Map<String,Double> otherMeans = otherStats.getMeans();
|
||||||
for ( String s : this.getAllSamples() ) {
|
for ( String s : this.getAllSamples() ) {
|
||||||
int[] internalCounts = granularHistogramBySample.get(s);
|
long[] internalCounts = granularHistogramBySample.get(s);
|
||||||
int[] externalCounts = otherHistogram.get(s);
|
long[] externalCounts = otherHistogram.get(s);
|
||||||
for ( int b = 0; b < internalCounts.length; b++ ) {
|
for ( int b = 0; b < internalCounts.length; b++ ) {
|
||||||
internalCounts[b] += externalCounts[b];
|
internalCounts[b] += externalCounts[b];
|
||||||
}
|
}
|
||||||
|
|
@ -225,7 +225,7 @@ public class DepthOfCoverageStats {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeLocusCounts( int[][] otherCounts ) {
|
private void mergeLocusCounts( long[][] otherCounts ) {
|
||||||
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
|
for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
|
||||||
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
|
for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
|
||||||
locusCoverageCounts[a][b] += otherCounts[a][b];
|
locusCoverageCounts[a][b] += otherCounts[a][b];
|
||||||
|
|
@ -260,11 +260,11 @@ public class DepthOfCoverageStats {
|
||||||
// ACCESSOR METHODS
|
// ACCESSOR METHODS
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
public Map<String,int[]> getHistograms() {
|
public Map<String,long[]> getHistograms() {
|
||||||
return granularHistogramBySample;
|
return granularHistogramBySample;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[][] getLocusCounts() {
|
public long[][] getLocusCounts() {
|
||||||
return locusCoverageCounts;
|
return locusCoverageCounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -302,7 +302,7 @@ public class DepthOfCoverageStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] getCoverageProportions(String sample) {
|
public double[] getCoverageProportions(String sample) {
|
||||||
int[] hist = granularHistogramBySample.get(sample);
|
long[] hist = granularHistogramBySample.get(sample);
|
||||||
double[] distribution = new double[hist.length];
|
double[] distribution = new double[hist.length];
|
||||||
long count = 0;
|
long count = 0;
|
||||||
for ( int i = hist.length-1; i >= 0; i -- ) {
|
for ( int i = hist.length-1; i >= 0; i -- ) {
|
||||||
|
|
|
||||||
|
|
@ -602,12 +602,12 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
||||||
hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator));
|
hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator));
|
||||||
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
|
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
|
||||||
output.print(hBuilder.toString());
|
output.print(hBuilder.toString());
|
||||||
Map<String,int[]> histograms = stats.getHistograms();
|
Map<String,long[]> histograms = stats.getHistograms();
|
||||||
|
|
||||||
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) {
|
for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
|
||||||
StringBuilder sBuilder = new StringBuilder();
|
StringBuilder sBuilder = new StringBuilder();
|
||||||
sBuilder.append(String.format("sample_%s",p.getKey()));
|
sBuilder.append(String.format("sample_%s",p.getKey()));
|
||||||
for ( int count : p.getValue() ) {
|
for ( long count : p.getValue() ) {
|
||||||
sBuilder.append(String.format("%s%d",separator,count));
|
sBuilder.append(String.format("%s%d",separator,count));
|
||||||
}
|
}
|
||||||
sBuilder.append(String.format("%n"));
|
sBuilder.append(String.format("%n"));
|
||||||
|
|
@ -625,7 +625,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
||||||
int[] endpoints = stats.getEndpoints();
|
int[] endpoints = stats.getEndpoints();
|
||||||
int samples = stats.getHistograms().size();
|
int samples = stats.getHistograms().size();
|
||||||
|
|
||||||
int[][] baseCoverageCumDist = stats.getLocusCounts();
|
long[][] baseCoverageCumDist = stats.getLocusCounts();
|
||||||
|
|
||||||
// rows - # of samples
|
// rows - # of samples
|
||||||
// columns - depth of coverage
|
// columns - depth of coverage
|
||||||
|
|
@ -693,14 +693,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
||||||
|
|
||||||
output.printf("%n");
|
output.printf("%n");
|
||||||
|
|
||||||
Map<String,int[]> histograms = stats.getHistograms();
|
Map<String,long[]> histograms = stats.getHistograms();
|
||||||
Map<String,Double> means = stats.getMeans();
|
Map<String,Double> means = stats.getMeans();
|
||||||
Map<String,Long> totals = stats.getTotals();
|
Map<String,Long> totals = stats.getTotals();
|
||||||
int[] leftEnds = stats.getEndpoints();
|
int[] leftEnds = stats.getEndpoints();
|
||||||
|
|
||||||
for ( Map.Entry<String, int[]> p : histograms.entrySet() ) {
|
for ( Map.Entry<String, long[]> p : histograms.entrySet() ) {
|
||||||
String s = p.getKey();
|
String s = p.getKey();
|
||||||
int[] histogram = p.getValue();
|
long[] histogram = p.getValue();
|
||||||
int median = getQuantile(histogram,0.5);
|
int median = getQuantile(histogram,0.5);
|
||||||
int q1 = getQuantile(histogram,0.25);
|
int q1 = getQuantile(histogram,0.25);
|
||||||
int q3 = getQuantile(histogram,0.75);
|
int q3 = getQuantile(histogram,0.75);
|
||||||
|
|
@ -728,7 +728,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getQuantile(int[] histogram, double prop) {
|
private int getQuantile(long[] histogram, double prop) {
|
||||||
int total = 0;
|
int total = 0;
|
||||||
|
|
||||||
for ( int i = 0; i < histogram.length; i ++ ) {
|
for ( int i = 0; i < histogram.length; i ++ ) {
|
||||||
|
|
@ -745,7 +745,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
||||||
return bin == -1 ? 0 : bin;
|
return bin == -1 ? 0 : bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getPctBasesAbove(int[] histogram, int bin) {
|
private double getPctBasesAbove(long[] histogram, int bin) {
|
||||||
long below = 0l;
|
long below = 0l;
|
||||||
long above = 0l;
|
long above = 0l;
|
||||||
for ( int index = 0; index < histogram.length; index++) {
|
for ( int index = 0; index < histogram.length; index++) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue