Now uses expanding array list for coverage histograms. No hard limit on maximum depth now
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1643 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4ad46590a3
commit
73bec6f36d
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.By;
|
import org.broadinstitute.sting.gatk.walkers.By;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.utils.ExpandingArrayList;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -19,29 +20,37 @@ public class CoverageHistogram extends LocusWalker<Integer,Integer>
|
||||||
//@Argument(fullName="start", shortName="start", required=false, doc="start") public Integer START = 0;
|
//@Argument(fullName="start", shortName="start", required=false, doc="start") public Integer START = 0;
|
||||||
|
|
||||||
// Private state.
|
// Private state.
|
||||||
long[] coverage_hist;
|
|
||||||
int max_depth;
|
|
||||||
|
|
||||||
long sum_coverage;
|
//long[] coverage_hist;
|
||||||
long num_sites;
|
ExpandingArrayList<Integer> coverage_hist = new ExpandingArrayList<Integer>();
|
||||||
|
int max_depth = 0;
|
||||||
|
|
||||||
|
long sum_coverage = 0;
|
||||||
|
long num_sites = 0;
|
||||||
|
|
||||||
/////////
|
/////////
|
||||||
// Walker Interface Functions
|
// Walker Interface Functions
|
||||||
public void initialize()
|
public void initialize()
|
||||||
{
|
{
|
||||||
coverage_hist = new long[1000000];
|
//coverage_hist = new long[1000000];
|
||||||
Arrays.fill(coverage_hist, 0);
|
//Arrays.fill(coverage_hist, 0);
|
||||||
max_depth = 0;
|
|
||||||
|
|
||||||
sum_coverage = 0;
|
|
||||||
num_sites = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void incCov(int depth) {
|
||||||
|
int c = coverage_hist.expandingGet(depth, 0);
|
||||||
|
coverage_hist.set(depth, c + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getCov(int depth) {
|
||||||
|
return coverage_hist.get(depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
||||||
{
|
{
|
||||||
if (ref.getBase() == 'N') { return null; }
|
if (ref.getBase() == 'N') { return null; }
|
||||||
int depth = context.getReads().size();
|
int depth = context.getReads().size();
|
||||||
coverage_hist[depth] += 1;
|
incCov(depth);
|
||||||
if (depth > max_depth) { max_depth = depth; }
|
if (depth > max_depth) { max_depth = depth; }
|
||||||
|
|
||||||
sum_coverage += depth;
|
sum_coverage += depth;
|
||||||
|
|
@ -53,40 +62,40 @@ public class CoverageHistogram extends LocusWalker<Integer,Integer>
|
||||||
public void onTraversalDone(Integer sum)
|
public void onTraversalDone(Integer sum)
|
||||||
{
|
{
|
||||||
double mean_coverage = (double)sum_coverage / (double)num_sites;
|
double mean_coverage = (double)sum_coverage / (double)num_sites;
|
||||||
double mean_good_coverage = (double)sum_coverage / ((double)(num_sites - coverage_hist[0]));
|
double mean_good_coverage = (double)sum_coverage / ((double)(num_sites - getCov(0)));
|
||||||
out.printf("# all_sites : mean:%f num_sites:%d%n", mean_coverage, num_sites);
|
out.printf("# all_sites : mean:%f num_sites:%d%n", mean_coverage, num_sites);
|
||||||
out.printf("# sites with at least 1 read : mean:%f num_sites:%d%n", mean_good_coverage, num_sites - coverage_hist[0]);
|
out.printf("# sites with at least 1 read : mean:%f num_sites:%d%n", mean_good_coverage, num_sites - getCov(0));
|
||||||
|
|
||||||
// Code for calculting std devs adapted from Michael Melgar's python script
|
// Code for calculting std devs adapted from Michael Melgar's python script
|
||||||
|
|
||||||
// Find the maximum extent of 'good' data
|
// Find the maximum extent of 'good' data
|
||||||
// First, find the mode
|
// First, find the mode
|
||||||
long maxValue = coverage_hist[1]; // ignore doc=0
|
long maxValue = getCov(1); // ignore doc=0
|
||||||
int mode = 1;
|
int mode = 1;
|
||||||
for (int i = 2; i <= max_depth; i++) {
|
for (int i = 2; i <= max_depth; i++) {
|
||||||
if (coverage_hist[i] > maxValue) {
|
if (getCov(i) > maxValue) {
|
||||||
maxValue = coverage_hist[i];
|
maxValue = getCov(i);
|
||||||
mode = i;
|
mode = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// now, procede to find end of good Gaussian fit
|
// now, procede to find end of good Gaussian fit
|
||||||
long dist = (long)Math.pow(10, 9);
|
long dist = (long)Math.pow(10, 9);
|
||||||
while ( Math.abs(coverage_hist[mode] - coverage_hist[1]) < dist )
|
while ( Math.abs(getCov(mode) - getCov(1)) < dist )
|
||||||
dist = Math.abs(coverage_hist[mode++] - coverage_hist[1]);
|
dist = Math.abs(getCov(mode++) - getCov(1));
|
||||||
int maxGoodDepth = mode + 1;
|
int maxGoodDepth = mode + 1;
|
||||||
|
|
||||||
// calculate the mean of the good region
|
// calculate the mean of the good region
|
||||||
long totalGoodSites = 0, totalGoodDepth = 0;
|
long totalGoodSites = 0, totalGoodDepth = 0;
|
||||||
for (int i = 1; i <= maxGoodDepth; i++) { // ignore doc=0
|
for (int i = 1; i <= maxGoodDepth; i++) { // ignore doc=0
|
||||||
totalGoodSites += coverage_hist[i];
|
totalGoodSites += getCov(i);
|
||||||
totalGoodDepth += i * coverage_hist[i];
|
totalGoodDepth += i * getCov(i);
|
||||||
}
|
}
|
||||||
double meanGoodDepth = (double)totalGoodDepth / (double)totalGoodSites;
|
double meanGoodDepth = (double)totalGoodDepth / (double)totalGoodSites;
|
||||||
|
|
||||||
// calculate the variance and standard deviation of the good region
|
// calculate the variance and standard deviation of the good region
|
||||||
double var = 0.0;
|
double var = 0.0;
|
||||||
for (int i = 1; i <= maxGoodDepth; i++) { // ignore doc=0
|
for (int i = 1; i <= maxGoodDepth; i++) { // ignore doc=0
|
||||||
var += coverage_hist[i] * Math.pow(meanGoodDepth - (double)i, 2);
|
var += getCov(i) * Math.pow(meanGoodDepth - (double)i, 2);
|
||||||
}
|
}
|
||||||
double stdev = Math.sqrt(var / (double)totalGoodSites);
|
double stdev = Math.sqrt(var / (double)totalGoodSites);
|
||||||
out.printf("# sites within Gaussian fit : mean:%f num_sites:%d std_dev:%f%n", meanGoodDepth, totalGoodSites, stdev);
|
out.printf("# sites within Gaussian fit : mean:%f num_sites:%d std_dev:%f%n", meanGoodDepth, totalGoodSites, stdev);
|
||||||
|
|
@ -97,9 +106,9 @@ public class CoverageHistogram extends LocusWalker<Integer,Integer>
|
||||||
out.println("\ndepth count freq(percent)");
|
out.println("\ndepth count freq(percent)");
|
||||||
for (int i = 0; i <= max_depth; i++)
|
for (int i = 0; i <= max_depth; i++)
|
||||||
{
|
{
|
||||||
out.printf("%d %d %f\n", i, coverage_hist[i], (100.0*coverage_hist[i]) / (double)num_sites);
|
out.printf("%d %d %f\n", i, getCov(i), (100.0*getCov(i)) / (double)num_sites);
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer reduceInit()
|
public Integer reduceInit()
|
||||||
|
|
|
||||||
|
|
@ -22,13 +22,22 @@ public class ExpandingArrayList<E> extends ArrayList<E> {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public E set(int index, E element) {
|
public E expandingGet(int index, E default_value) throws IndexOutOfBoundsException {
|
||||||
|
maybeExpand(index, default_value);
|
||||||
|
return super.get(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void maybeExpand(int index, E value) {
|
||||||
if ( index >= size() ) {
|
if ( index >= size() ) {
|
||||||
// We need to add null items until we can safely set index to element
|
// We need to add null items until we can safely set index to element
|
||||||
for ( int i = size(); i <= index; i++ )
|
for ( int i = size(); i <= index; i++ )
|
||||||
add(null);
|
add(value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public E set(int index, E element) {
|
||||||
|
maybeExpand(index, null);
|
||||||
return super.set(index, element);
|
return super.set(index, element);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue