Changes to DepthOfCoverage (JIRA items) and added back an integration test to cover it. Alterations to the design file generator to output all transcripts (rather than choosing one at random).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3366 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4235164359
commit
b7d21627ab
|
|
@ -77,6 +77,10 @@ public class rodRefSeq extends BasicReferenceOrderedDatum implements Transcript
|
||||||
return numbers;
|
return numbers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTranscriptUniqueGeneName() {
|
||||||
|
return String.format("%s(%s)",getGeneName(),getTranscriptId());
|
||||||
|
}
|
||||||
|
|
||||||
public String getOverlapString(GenomeLoc position) {
|
public String getOverlapString(GenomeLoc position) {
|
||||||
boolean is_exon = false;
|
boolean is_exon = false;
|
||||||
StringBuilder overlapString = new StringBuilder();
|
StringBuilder overlapString = new StringBuilder();
|
||||||
|
|
|
||||||
|
|
@ -61,13 +61,6 @@ import java.util.*;
|
||||||
* @Author chartl
|
* @Author chartl
|
||||||
* @Date Feb 22, 2010
|
* @Date Feb 22, 2010
|
||||||
*/
|
*/
|
||||||
// todo [DONE] -- add per target (e.g. regional) aggregation
|
|
||||||
// todo [DONE] -- add ability to print out the calculated bins and quit (for pre-analysis bin size selection)
|
|
||||||
// todo [DONE] -- refactor the location of the ALL_SAMPLE metrics [keep out of the per-sample HashMaps]
|
|
||||||
// todo [DONE] -- per locus output through -o
|
|
||||||
// todo [DONE] -- support for using read groups instead of samples
|
|
||||||
// todo [DONE] -- coverage including deletions
|
|
||||||
// todo [DONE] -- base counts
|
|
||||||
// todo -- cache the map from sample names to means in the print functions, rather than regenerating each time
|
// todo -- cache the map from sample names to means in the print functions, rather than regenerating each time
|
||||||
// todo -- support for granular histograms for total depth; maybe n*[start,stop], bins*sqrt(n)
|
// todo -- support for granular histograms for total depth; maybe n*[start,stop], bins*sqrt(n)
|
||||||
// todo -- alter logarithmic scaling to spread out bins more
|
// todo -- alter logarithmic scaling to spread out bins more
|
||||||
|
|
@ -81,9 +74,9 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
int stop = 500;
|
int stop = 500;
|
||||||
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
|
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
|
||||||
int nBins = 499;
|
int nBins = 499;
|
||||||
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to 50.", required = false)
|
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false)
|
||||||
byte minMappingQuality = -1;
|
byte minMappingQuality = -1;
|
||||||
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to 20.", required = false)
|
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to -1.", required = false)
|
||||||
byte minBaseQuality = -1;
|
byte minBaseQuality = -1;
|
||||||
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
|
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
|
||||||
boolean printBaseCounts = false;
|
boolean printBaseCounts = false;
|
||||||
|
|
@ -107,6 +100,11 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
boolean ignoreDeletionSites = false;
|
boolean ignoreDeletionSites = false;
|
||||||
@Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false)
|
@Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false)
|
||||||
File refSeqGeneList = null;
|
File refSeqGeneList = null;
|
||||||
|
@Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false)
|
||||||
|
String outputFormat = "rtable";
|
||||||
|
|
||||||
|
String[] OUTPUT_FORMATS = {"table","rtable","csv"};
|
||||||
|
String separator = "\t";
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// STANDARD WALKER METHODS
|
// STANDARD WALKER METHODS
|
||||||
|
|
@ -126,6 +124,20 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean goodOutputFormat = false;
|
||||||
|
for ( String f : OUTPUT_FORMATS ) {
|
||||||
|
goodOutputFormat = goodOutputFormat || f.equals(outputFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! goodOutputFormat ) {
|
||||||
|
System.out.println("Improper output format. Can be one of table,rtable,csv. Was "+outputFormat);
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( outputFormat.equals("csv") ) {
|
||||||
|
separator = ",";
|
||||||
|
}
|
||||||
|
|
||||||
if ( getToolkit().getArguments().outFileName == null ) {
|
if ( getToolkit().getArguments().outFileName == null ) {
|
||||||
logger.warn("This walker creates many output files from one input file; you may wish to specify an input file rather "+
|
logger.warn("This walker creates many output files from one input file; you may wish to specify an input file rather "+
|
||||||
"than defaulting all output to stdout.");
|
"than defaulting all output to stdout.");
|
||||||
|
|
@ -282,30 +294,31 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
throw new StingException("Unable to open interval file on reduce", e);
|
throw new StingException("Unable to open interval file on reduce", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Pair<GenomeLoc,CoverageAggregator> firstPair = statsByInterval.get(0);
|
Pair<GenomeLoc,CoverageAggregator> firstPair = statsByInterval.get(0);
|
||||||
CoverageAggregator firstAggregator = firstPair.second;
|
CoverageAggregator firstAggregator = firstPair.second;
|
||||||
DepthOfCoverageStats firstStats = isSample ? firstAggregator.getCoverageBySample() : firstAggregator.getCoverageByReadGroup();
|
DepthOfCoverageStats firstStats = isSample ? firstAggregator.getCoverageBySample() : firstAggregator.getCoverageByReadGroup();
|
||||||
|
|
||||||
StringBuilder summaryHeader = new StringBuilder();
|
StringBuilder summaryHeader = new StringBuilder();
|
||||||
summaryHeader.append("Target");
|
summaryHeader.append("Target");
|
||||||
summaryHeader.append("\ttotal_coverage");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append("\taverage_coverage");
|
summaryHeader.append("total_coverage");
|
||||||
|
summaryHeader.append(separator);
|
||||||
|
summaryHeader.append("average_coverage");
|
||||||
|
|
||||||
for ( String s : firstStats.getAllSamples() ) {
|
for ( String s : firstStats.getAllSamples() ) {
|
||||||
summaryHeader.append("\t");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append(s);
|
summaryHeader.append(s);
|
||||||
summaryHeader.append("_total_cvg");
|
summaryHeader.append("_total_cvg");
|
||||||
summaryHeader.append("\t");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append(s);
|
summaryHeader.append(s);
|
||||||
summaryHeader.append("_mean_cvg");
|
summaryHeader.append("_mean_cvg");
|
||||||
summaryHeader.append("\t");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append(s);
|
summaryHeader.append(s);
|
||||||
summaryHeader.append("_granular_Q1");
|
summaryHeader.append("_granular_Q1");
|
||||||
summaryHeader.append("\t");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append(s);
|
summaryHeader.append(s);
|
||||||
summaryHeader.append("_granular_median");
|
summaryHeader.append("_granular_median");
|
||||||
summaryHeader.append("\t");
|
summaryHeader.append(separator);
|
||||||
summaryHeader.append(s);
|
summaryHeader.append(s);
|
||||||
summaryHeader.append("_granular_Q3");
|
summaryHeader.append("_granular_Q3");
|
||||||
}
|
}
|
||||||
|
|
@ -386,26 +399,27 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
private void printTargetSummary(PrintStream output, Pair<?,DepthOfCoverageStats> intervalStats) {
|
private void printTargetSummary(PrintStream output, Pair<?,DepthOfCoverageStats> intervalStats) {
|
||||||
DepthOfCoverageStats stats = intervalStats.second;
|
DepthOfCoverageStats stats = intervalStats.second;
|
||||||
int[] bins = stats.getEndpoints();
|
int[] bins = stats.getEndpoints();
|
||||||
|
|
||||||
StringBuilder targetSummary = new StringBuilder();
|
StringBuilder targetSummary = new StringBuilder();
|
||||||
targetSummary.append(intervalStats.first.toString());
|
targetSummary.append(intervalStats.first.toString());
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(stats.getTotalCoverage());
|
targetSummary.append(stats.getTotalCoverage());
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(String.format("%.2f",stats.getTotalMeanCoverage()));
|
targetSummary.append(String.format("%.2f",stats.getTotalMeanCoverage()));
|
||||||
|
|
||||||
for ( String s : stats.getAllSamples() ) {
|
for ( String s : stats.getAllSamples() ) {
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(stats.getTotals().get(s));
|
targetSummary.append(stats.getTotals().get(s));
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(String.format("%.2f", stats.getMeans().get(s)));
|
targetSummary.append(String.format("%.2f", stats.getMeans().get(s)));
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
int median = getQuantile(stats.getHistograms().get(s),0.5);
|
int median = getQuantile(stats.getHistograms().get(s),0.5);
|
||||||
int q1 = getQuantile(stats.getHistograms().get(s),0.25);
|
int q1 = getQuantile(stats.getHistograms().get(s),0.25);
|
||||||
int q3 = getQuantile(stats.getHistograms().get(s),0.75);
|
int q3 = getQuantile(stats.getHistograms().get(s),0.75);
|
||||||
targetSummary.append(formatBin(bins,q1));
|
targetSummary.append(formatBin(bins,q1));
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(formatBin(bins,median));
|
targetSummary.append(formatBin(bins,median));
|
||||||
targetSummary.append("\t");
|
targetSummary.append(separator);
|
||||||
targetSummary.append(formatBin(bins,q3));
|
targetSummary.append(formatBin(bins,q3));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -424,16 +438,17 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printIntervalTable(PrintStream output, int[][] intervalTable, int[] cutoffs) {
|
private void printIntervalTable(PrintStream output, int[][] intervalTable, int[] cutoffs) {
|
||||||
output.printf("\tdepth>=%d",0);
|
String colHeader = outputFormat.equals("rtable") ? "" : "Number_of_sources";
|
||||||
|
output.printf(colHeader + separator+"depth>=%d",0);
|
||||||
for ( int col = 0; col < intervalTable[0].length-1; col ++ ) {
|
for ( int col = 0; col < intervalTable[0].length-1; col ++ ) {
|
||||||
output.printf("\tdepth>=%d",cutoffs[col]);
|
output.printf(separator+"depth>=%d",cutoffs[col]);
|
||||||
}
|
}
|
||||||
|
|
||||||
output.printf(String.format("%n"));
|
output.printf(String.format("%n"));
|
||||||
for ( int row = 0; row < intervalTable.length; row ++ ) {
|
for ( int row = 0; row < intervalTable.length; row ++ ) {
|
||||||
output.printf("At_least_%d_samples",row+1);
|
output.printf("At_least_%d_samples",row+1);
|
||||||
for ( int col = 0; col < intervalTable[0].length; col++ ) {
|
for ( int col = 0; col < intervalTable[0].length; col++ ) {
|
||||||
output.printf("\t%d",intervalTable[row][col]);
|
output.printf(separator+"%d",intervalTable[row][col]);
|
||||||
}
|
}
|
||||||
output.printf(String.format("%n"));
|
output.printf(String.format("%n"));
|
||||||
}
|
}
|
||||||
|
|
@ -503,13 +518,13 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
if ( ! useReadGroup || useBoth ) {
|
if ( ! useReadGroup || useBoth ) {
|
||||||
File perLocusStatisticsFile = deriveFromStream("sample_locus_statistics");
|
File perLocusStatisticsFile = deriveFromStream("sample_locus_statistics");
|
||||||
File perLocusCoverageFile = deriveFromStream("sample_coverage_statistics");
|
File perLocusCoverageFile = deriveFromStream("sample_coverage_statistics");
|
||||||
printPerLocus(perLocusStatisticsFile,perLocusCoverageFile,coverageProfiles.getCoverageBySample());
|
printPerLocus(perLocusStatisticsFile,perLocusCoverageFile,coverageProfiles.getCoverageBySample(),false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( useReadGroup || useBoth ) {
|
if ( useReadGroup || useBoth ) {
|
||||||
File perLocusRGStats = deriveFromStream("read_group_locus_statistics");
|
File perLocusRGStats = deriveFromStream("read_group_locus_statistics");
|
||||||
File perLocusRGCoverage = deriveFromStream("read_group_locus_coverage");
|
File perLocusRGCoverage = deriveFromStream("read_group_locus_coverage");
|
||||||
printPerLocus(perLocusRGStats,perLocusRGCoverage,coverageProfiles.getCoverageByReadGroup());
|
printPerLocus(perLocusRGStats,perLocusRGCoverage,coverageProfiles.getCoverageByReadGroup(),true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -530,11 +545,15 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
private void printPerSample(PrintStream out, File optionalFile, DepthOfCoverageStats stats) {
|
private void printPerSample(PrintStream out, File optionalFile, DepthOfCoverageStats stats) {
|
||||||
PrintStream output = getCorrectStream(out,optionalFile);
|
PrintStream output = getCorrectStream(out,optionalFile);
|
||||||
int[] leftEnds = stats.getEndpoints();
|
int[] leftEnds = stats.getEndpoints();
|
||||||
|
|
||||||
StringBuilder hBuilder = new StringBuilder();
|
StringBuilder hBuilder = new StringBuilder();
|
||||||
hBuilder.append("\t");
|
if ( ! outputFormat.equals("rTable")) {
|
||||||
hBuilder.append(String.format("from_0_to_%d)\t",leftEnds[0]));
|
hBuilder.append("Source_of_reads");
|
||||||
|
}
|
||||||
|
hBuilder.append(separator);
|
||||||
|
hBuilder.append(String.format("from_0_to_%d)%s",leftEnds[0],separator));
|
||||||
for ( int i = 1; i < leftEnds.length; i++ )
|
for ( int i = 1; i < leftEnds.length; i++ )
|
||||||
hBuilder.append(String.format("from_%d_to_%d)\t",leftEnds[i-1],leftEnds[i]));
|
hBuilder.append(String.format("from_%d_to_%d)%s",leftEnds[i-1],leftEnds[i],separator));
|
||||||
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
|
hBuilder.append(String.format("from_%d_to_inf%n",leftEnds[leftEnds.length-1]));
|
||||||
output.print(hBuilder.toString());
|
output.print(hBuilder.toString());
|
||||||
Map<String,int[]> histograms = stats.getHistograms();
|
Map<String,int[]> histograms = stats.getHistograms();
|
||||||
|
|
@ -542,14 +561,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
StringBuilder sBuilder = new StringBuilder();
|
StringBuilder sBuilder = new StringBuilder();
|
||||||
sBuilder.append(String.format("sample_%s",s));
|
sBuilder.append(String.format("sample_%s",s));
|
||||||
for ( int count : histograms.get(s) ) {
|
for ( int count : histograms.get(s) ) {
|
||||||
sBuilder.append(String.format("\t%d",count));
|
sBuilder.append(String.format("%s%d",separator,count));
|
||||||
}
|
}
|
||||||
sBuilder.append(String.format("%n"));
|
sBuilder.append(String.format("%n"));
|
||||||
output.print(sBuilder.toString());
|
output.print(sBuilder.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printPerLocus(File locusFile, File coverageFile, DepthOfCoverageStats stats) {
|
private void printPerLocus(File locusFile, File coverageFile, DepthOfCoverageStats stats, boolean isRG) {
|
||||||
PrintStream output = getCorrectStream(out,locusFile);
|
PrintStream output = getCorrectStream(out,locusFile);
|
||||||
PrintStream coverageOut = getCorrectStream(out,coverageFile);
|
PrintStream coverageOut = getCorrectStream(out,coverageFile);
|
||||||
if ( output == null ) {
|
if ( output == null ) {
|
||||||
|
|
@ -564,10 +583,19 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
// rows - # of samples
|
// rows - # of samples
|
||||||
// columns - depth of coverage
|
// columns - depth of coverage
|
||||||
|
|
||||||
|
boolean printSampleColumnHeader = outputFormat.equals("csv") || outputFormat.equals("table");
|
||||||
|
|
||||||
StringBuilder header = new StringBuilder();
|
StringBuilder header = new StringBuilder();
|
||||||
header.append(String.format("\tgte_0"));
|
if ( printSampleColumnHeader ) {
|
||||||
|
if ( isRG ) {
|
||||||
|
header.append("ReadGroup");
|
||||||
|
} else {
|
||||||
|
header.append("Sample");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
header.append(String.format("%sgte_0",separator));
|
||||||
for ( int d : endpoints ) {
|
for ( int d : endpoints ) {
|
||||||
header.append(String.format("\tgte_%d",d));
|
header.append(String.format("%sgte_%d",separator,d));
|
||||||
}
|
}
|
||||||
header.append(String.format("%n"));
|
header.append(String.format("%n"));
|
||||||
|
|
||||||
|
|
@ -577,7 +605,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
for ( int row = 0; row < samples; row ++ ) {
|
for ( int row = 0; row < samples; row ++ ) {
|
||||||
output.printf("%s_%d","NSamples",row+1);
|
output.printf("%s_%d","NSamples",row+1);
|
||||||
for ( int depthBin = 0; depthBin < baseCoverageCumDist[0].length; depthBin ++ ) {
|
for ( int depthBin = 0; depthBin < baseCoverageCumDist[0].length; depthBin ++ ) {
|
||||||
output.printf("\t%d",baseCoverageCumDist[row][depthBin]);
|
output.printf("%s%d",separator,baseCoverageCumDist[row][depthBin]);
|
||||||
}
|
}
|
||||||
output.printf("%n");
|
output.printf("%n");
|
||||||
}
|
}
|
||||||
|
|
@ -586,7 +614,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
coverageOut.printf("%s",sample);
|
coverageOut.printf("%s",sample);
|
||||||
double[] coverageDistribution = stats.getCoverageProportions(sample);
|
double[] coverageDistribution = stats.getCoverageProportions(sample);
|
||||||
for ( int bin = 0; bin < coverageDistribution.length; bin ++ ) {
|
for ( int bin = 0; bin < coverageDistribution.length; bin ++ ) {
|
||||||
coverageOut.printf("\t%.2f",coverageDistribution[bin]);
|
coverageOut.printf("%s%.2f",separator,coverageDistribution[bin]);
|
||||||
}
|
}
|
||||||
coverageOut.printf("%n");
|
coverageOut.printf("%n");
|
||||||
}
|
}
|
||||||
|
|
@ -610,8 +638,11 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
|
|
||||||
private void printSummary(PrintStream out, File optionalFile, DepthOfCoverageStats stats) {
|
private void printSummary(PrintStream out, File optionalFile, DepthOfCoverageStats stats) {
|
||||||
PrintStream output = getCorrectStream(out,optionalFile);
|
PrintStream output = getCorrectStream(out,optionalFile);
|
||||||
|
if ( ! outputFormat.equals("csv") ) {
|
||||||
output.printf("%s\t%s\t%s\t%s\t%s\t%s%n","sample_id","total","mean","granular_third_quartile","granular_median","granular_first_quartile");
|
output.printf("%s\t%s\t%s\t%s\t%s\t%s%n","sample_id","total","mean","granular_third_quartile","granular_median","granular_first_quartile");
|
||||||
|
} else {
|
||||||
|
output.printf("%s,%s,%s,%s,%s,%s%n","sample_id","total","mean","granular_third_quartile","granular_median","granular_first_quartile");
|
||||||
|
}
|
||||||
|
|
||||||
Map<String,int[]> histograms = stats.getHistograms();
|
Map<String,int[]> histograms = stats.getHistograms();
|
||||||
Map<String,Double> means = stats.getMeans();
|
Map<String,Double> means = stats.getMeans();
|
||||||
|
|
@ -627,10 +658,18 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
median = median == histogram.length-1 ? histogram.length-2 : median;
|
median = median == histogram.length-1 ? histogram.length-2 : median;
|
||||||
q1 = q1 == histogram.length-1 ? histogram.length-2 : q1;
|
q1 = q1 == histogram.length-1 ? histogram.length-2 : q1;
|
||||||
q3 = q3 == histogram.length-1 ? histogram.length-2 : q3;
|
q3 = q3 == histogram.length-1 ? histogram.length-2 : q3;
|
||||||
|
if ( ! outputFormat.equals("csv") ) {
|
||||||
output.printf("%s\t%d\t%.2f\t%d\t%d\t%d%n",s,totals.get(s),means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
output.printf("%s\t%d\t%.2f\t%d\t%d\t%d%n",s,totals.get(s),means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
||||||
|
} else {
|
||||||
|
output.printf("%s,%d,%.2f,%d,%d,%d%n",s,totals.get(s),means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( ! outputFormat.equals("csv") ) {
|
||||||
output.printf("%s\t%d\t%.2f\t%s\t%s\t%s%n","Total",stats.getTotalCoverage(),stats.getTotalMeanCoverage(),"N/A","N/A","N/A");
|
output.printf("%s\t%d\t%.2f\t%s\t%s\t%s%n","Total",stats.getTotalCoverage(),stats.getTotalMeanCoverage(),"N/A","N/A","N/A");
|
||||||
|
} else {
|
||||||
|
output.printf("%s,%d,%.2f,%s,%s,%s%n","Total",stats.getTotalCoverage(),stats.getTotalMeanCoverage(),"N/A","N/A","N/A");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getQuantile(int[] histogram, double prop) {
|
private int getQuantile(int[] histogram, double prop) {
|
||||||
|
|
@ -656,19 +695,18 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<String,int[]>, Covera
|
||||||
StringBuilder perSampleOutput = new StringBuilder();
|
StringBuilder perSampleOutput = new StringBuilder();
|
||||||
int tDepth = 0;
|
int tDepth = 0;
|
||||||
for ( String s : allSamples ) {
|
for ( String s : allSamples ) {
|
||||||
perSampleOutput.append("\t");
|
perSampleOutput.append(separator);
|
||||||
long dp = countsBySample.keySet().contains(s) ? sumArray(countsBySample.get(s)) : 0;
|
long dp = countsBySample.keySet().contains(s) ? sumArray(countsBySample.get(s)) : 0;
|
||||||
perSampleOutput.append(dp);
|
perSampleOutput.append(dp);
|
||||||
if ( printBaseCounts ) {
|
if ( printBaseCounts ) {
|
||||||
perSampleOutput.append("\t");
|
perSampleOutput.append(separator);
|
||||||
perSampleOutput.append(baseCounts(countsBySample.get(s)));
|
perSampleOutput.append(baseCounts(countsBySample.get(s)));
|
||||||
}
|
}
|
||||||
tDepth += dp;
|
tDepth += dp;
|
||||||
}
|
}
|
||||||
// remember -- genome locus was printed in map()
|
// remember -- genome locus was printed in map()
|
||||||
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
stream.printf("%s%d%s%.2f%s%n",separator,tDepth,separator,( (double) tDepth/ (double) allSamples.size()),
|
||||||
//System.out.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
perSampleOutput);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private long sumArray(int[] array) {
|
private long sumArray(int[] array) {
|
||||||
|
|
|
||||||
|
|
@ -86,9 +86,9 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
|
||||||
for ( GenomeLoc interval : intervalBuffer.keySet() ) {
|
for ( GenomeLoc interval : intervalBuffer.keySet() ) {
|
||||||
for ( rodRefSeq refseq : refseqBuffer ) {
|
for ( rodRefSeq refseq : refseqBuffer ) {
|
||||||
if ( interval.overlapsP(refseq.getLocation()) &&
|
if ( interval.overlapsP(refseq.getLocation()) &&
|
||||||
! intervalBuffer.get(interval).geneNames.contains(refseq.getGeneName()) ) {
|
! intervalBuffer.get(interval).geneNames.contains(refseq.getTranscriptUniqueGeneName()) ) {
|
||||||
// if the interval overlaps the gene transcript; and the gene is not already represented in the interval
|
// if the interval overlaps the gene transcript; and the gene is not already represented in the interval
|
||||||
intervalBuffer.get(interval).update(refseq.getGeneName(),
|
intervalBuffer.get(interval).update(refseq.getTranscriptUniqueGeneName(),
|
||||||
refseq.getExonsInInterval(interval),
|
refseq.getExonsInInterval(interval),
|
||||||
refseq.getExonNumbersInInterval(interval));
|
refseq.getExonNumbersInInterval(interval));
|
||||||
nUpdate++;
|
nUpdate++;
|
||||||
|
|
|
||||||
|
|
@ -50,45 +50,26 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
||||||
File baseOutputFile = this.createTempFile("depthofcoveragenofiltering",".tmp");
|
File baseOutputFile = this.createTempFile("depthofcoveragenofiltering",".tmp");
|
||||||
this.setOutputFileLocation(baseOutputFile);
|
this.setOutputFileLocation(baseOutputFile);
|
||||||
|
|
||||||
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
|
String[] intervals = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/fhs_jhs_30_targts.interval_list"};
|
||||||
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
|
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/FHS_indexed_subset.bam"};
|
||||||
|
|
||||||
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -both";
|
String cmd = buildRootCmd(hg18,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -both --outputFormat csv";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
|
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
|
||||||
|
|
||||||
// now add the expected files that get generated
|
// now add the expected files that get generated
|
||||||
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", baseOutputFile);
|
spec.addAuxFile("f53ddf25c2b71e46381f9c434402d88d", baseOutputFile);
|
||||||
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
spec.addAuxFile("925cc5b49286e0222bce6251d1baafc7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||||
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
spec.addAuxFile("d9e812398d778f28ed12d7f3d18628e2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||||
spec.addAuxFile("faf63982622764299c734299ffeb3c63", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics"));
|
spec.addAuxFile("80577bf378de570f84d91b0ef6004102", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics"));
|
||||||
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics"));
|
spec.addAuxFile("3a059ad82d945643dc4e03239c4041f5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics"));
|
||||||
spec.addAuxFile("ef8c3e2ba3fc0da829e10e2d487c00d2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
spec.addAuxFile("f3315551081331bc322c53b11412d707", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||||
spec.addAuxFile("223377e07b35e81a394b75b38d8e72ee", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
|
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
|
||||||
spec.addAuxFile("096f4ed94020327288ea76245ebd6942", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
|
spec.addAuxFile("fd29fe0c14351e934a6fef9df1f38f7b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
|
||||||
spec.addAuxFile("8e391b7b84fc19dff83002b6dd9773d8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_locus_statistics"));
|
spec.addAuxFile("111261f0e8ccf8c456d0b2a9482bc32c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_locus_statistics"));
|
||||||
spec.addAuxFile("43c160ff9d754744728c142709011993", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
|
spec.addAuxFile("cc7ee5075a932dba486e78824ca34202", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
|
||||||
spec.addAuxFile("a374410efe20609c5c4b87a6da7f4d51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
|
spec.addAuxFile("e1653480daa2d96f7c584ed4cd20c147", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
|
||||||
|
|
||||||
//execute("testBaseOutputNoFiltering",spec);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMedianOverRightHandBin() {
|
|
||||||
File base = this.createTempFile("depthofcoveragelowbins",".tmp");
|
|
||||||
this.setOutputFileLocation(base);
|
|
||||||
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
|
|
||||||
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
|
|
||||||
|
|
||||||
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) +
|
|
||||||
" -mmq 0 -mbq 0 -dels -baseCounts -both --start 1 --stop 14 --nBins 13";
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
|
|
||||||
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", base);
|
|
||||||
spec.addAuxFile("219d643627eedd696bc476aac96376c2", createTempFileFromBase(base.getAbsolutePath()+".read_group_interval_statistics"));
|
|
||||||
spec.addAuxFile("dd0225cf1e0b0bd4289b82fd4939f9fd", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_statistics"));
|
|
||||||
spec.addAuxFile("63575a8a2110507e08d421d44d06b327", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_summary"));
|
|
||||||
|
|
||||||
//execute("testMedianOverRHBin",spec);
|
|
||||||
|
|
||||||
|
execute("testBaseOutputNoFiltering",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
public File createTempFileFromBase(String name) {
|
public File createTempFileFromBase(String name) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue