Commit for Aaron
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2932 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c20d3e567e
commit
706d49d84c
|
|
@ -4,6 +4,11 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
|
|
@ -48,9 +53,9 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
||||
int start = 1;
|
||||
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)
|
||||
int stop = 1000;
|
||||
int stop = 500;
|
||||
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
|
||||
int nBins = 20;
|
||||
int nBins = 499;
|
||||
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to 50.", required = false)
|
||||
byte minMappingQuality = 50;
|
||||
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to 20.", required = false)
|
||||
|
|
@ -73,6 +78,8 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
boolean includeDeletions = false;
|
||||
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
|
||||
boolean ignoreDeletionSites = false;
|
||||
@Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false)
|
||||
File refSeqGeneList = null;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// STANDARD WALKER METHODS
|
||||
|
|
@ -98,17 +105,21 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
|
||||
if ( ! omitDepthOutput ) { // print header
|
||||
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
||||
//System.out.printf("\t[log]\t%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
||||
// get all the samples
|
||||
HashSet<String> allSamples = getSamplesFromToolKit(useReadGroup);
|
||||
|
||||
for ( String s : allSamples) {
|
||||
out.printf("\t%s_%s","Depth_for",s);
|
||||
//System.out.printf("\t%s_%s","Depth_for",s);
|
||||
if ( printBaseCounts ) {
|
||||
out.printf("\t%s_%s",s,"_base_counts");
|
||||
out.printf("\t%s_%s",s,"base_counts");
|
||||
//System.out.printf("\t%s_%s",s,"base_counts");
|
||||
}
|
||||
}
|
||||
|
||||
out.printf("%n");
|
||||
//System.out.printf("%n");
|
||||
|
||||
} else {
|
||||
out.printf("Per-Locus Depth of Coverage output was omitted");
|
||||
|
|
@ -162,6 +173,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
|
||||
if ( ! omitDepthOutput ) {
|
||||
out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives)
|
||||
//System.out.printf("\t[log]\t%s",ref.getLocus());
|
||||
}
|
||||
|
||||
Map<String,int[]> countsBySample = CoverageUtils.getBaseCountsBySample(context,minMappingQuality,minBaseQuality,
|
||||
|
|
@ -191,6 +203,9 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
public void onTraversalDone( List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByInterval ) {
|
||||
if ( refSeqGeneList != null ) {
|
||||
printGeneStats(statsByInterval);
|
||||
}
|
||||
File intervalStatisticsFile = deriveFromStream("interval_statistics");
|
||||
File intervalSummaryFile = deriveFromStream("interval_summary");
|
||||
DepthOfCoverageStats mergedStats = printIntervalStatsAndMerge(statsByInterval,intervalSummaryFile, intervalStatisticsFile);
|
||||
|
|
@ -263,7 +278,57 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
return firstStats;
|
||||
}
|
||||
|
||||
private void printTargetSummary(PrintStream output, Pair<GenomeLoc,DepthOfCoverageStats> intervalStats) {
|
||||
private void printGeneStats(List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByTarget) {
|
||||
LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq();
|
||||
List<Pair<String,DepthOfCoverageStats>> statsByGene = new ArrayList<Pair<String,DepthOfCoverageStats>>();// maintains order
|
||||
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // alows indirect updating of objects in list
|
||||
|
||||
for ( Pair<GenomeLoc,DepthOfCoverageStats> targetStats : statsByTarget ) {
|
||||
String gene = getGeneName(targetStats.first,refseqIterator);
|
||||
if ( geneNamesToStats.keySet().contains(gene) ) {
|
||||
geneNamesToStats.get(gene).merge(targetStats.second);
|
||||
} else {
|
||||
geneNamesToStats.put(gene,targetStats.second);
|
||||
statsByGene.add(new Pair<String,DepthOfCoverageStats>(gene,targetStats.second));
|
||||
}
|
||||
}
|
||||
|
||||
PrintStream geneSummaryOut = getCorrectStream(out,deriveFromStream("gene_summary"));
|
||||
|
||||
for ( Pair<String,DepthOfCoverageStats> geneStats : statsByGene ) {
|
||||
printTargetSummary(geneSummaryOut,geneStats);
|
||||
}
|
||||
|
||||
if ( ! getToolkit().getArguments().outFileName.contains("stdout")) {
|
||||
geneSummaryOut.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//blatantly stolen from Andrew Kernytsky
|
||||
private String getGeneName(GenomeLoc target, LocationAwareSeekableRODIterator refseqIterator) {
|
||||
if (refseqIterator == null) { return "UNKNOWN"; }
|
||||
|
||||
RODRecordList annotationList = refseqIterator.seekForward(target);
|
||||
if (annotationList == null) { return "UNKNOWN"; }
|
||||
|
||||
for(ReferenceOrderedDatum rec : annotationList) {
|
||||
if ( ((rodRefSeq)rec).overlapsExonP(target) ) {
|
||||
return ((rodRefSeq)rec).getGeneName();
|
||||
}
|
||||
}
|
||||
|
||||
return "UNKNOWN";
|
||||
|
||||
}
|
||||
|
||||
private LocationAwareSeekableRODIterator initializeRefSeq() {
|
||||
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
||||
refSeqGeneList, rodRefSeq.class);
|
||||
return refseq.iterator();
|
||||
}
|
||||
|
||||
private void printTargetSummary(PrintStream output, Pair<?,DepthOfCoverageStats> intervalStats) {
|
||||
DepthOfCoverageStats stats = intervalStats.second;
|
||||
int[] bins = stats.getEndpoints();
|
||||
StringBuilder targetSummary = new StringBuilder();
|
||||
|
|
@ -452,10 +517,11 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
private void printSummary(PrintStream out, File optionalFile, DepthOfCoverageStats stats) {
|
||||
PrintStream output = getCorrectStream(out,optionalFile);
|
||||
|
||||
output.printf("%s\t%s\t%s\t%s\t%s%n","sample_id","mean","granular_third_quartile","granular_median","granular_first_quartile");
|
||||
output.printf("%s\t%s\t%s\t%s\t%s\t%s%n","sample_id","total","mean","granular_third_quartile","granular_median","granular_first_quartile");
|
||||
|
||||
Map<String,int[]> histograms = stats.getHistograms();
|
||||
Map<String,Double> means = stats.getMeans();
|
||||
Map<String,Long> totals = stats.getTotals();
|
||||
int[] leftEnds = stats.getEndpoints();
|
||||
|
||||
for ( String s : histograms.keySet() ) {
|
||||
|
|
@ -467,10 +533,10 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
median = median == histogram.length-1 ? histogram.length-2 : median;
|
||||
q1 = q1 == histogram.length-1 ? histogram.length-2 : q1;
|
||||
q3 = q3 == histogram.length-1 ? histogram.length-2 : q3;
|
||||
output.printf("%s\t%.2f\t%d\t%d\t%d%n",s,means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
||||
output.printf("%s\t%d\t%.2f\t%d\t%d\t%d%n",s,totals.get(s),means.get(s),leftEnds[q3],leftEnds[median],leftEnds[q1]);
|
||||
}
|
||||
|
||||
output.printf("%s\t%.2f\t%s\t%s\t%s%n","Total",stats.getTotalMeanCoverage(),"N/A","N/A","N/A");
|
||||
output.printf("%s\t%d\t%.2f\t%s\t%s\t%s%n","Total",stats.getTotalCoverage(),stats.getTotalMeanCoverage(),"N/A","N/A","N/A");
|
||||
}
|
||||
|
||||
private int getQuantile(int[] histogram, double prop) {
|
||||
|
|
@ -507,6 +573,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
|||
}
|
||||
// remember -- genome locus was printed in map()
|
||||
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||
//System.out.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ import org.broadinstitute.sting.WalkerTest;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
|
|
@ -14,11 +16,26 @@ import java.util.Arrays;
|
|||
*/
|
||||
public class CoverageStatisticsIntegrationTest extends WalkerTest {
|
||||
|
||||
private boolean RUN_TESTS = true;
|
||||
private boolean RUN_TESTS = false;
|
||||
private String root = "-T CoverageStatistics ";
|
||||
private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
private String b36 = "/broad/1KG/reference/human_b36_both.fasta";
|
||||
|
||||
private String buildRootCmd(String ref, String bam, String interval) {
|
||||
return root + "-R "+ref+" -I "+bam+" -L "+interval;
|
||||
private String buildRootCmd(String ref, List<String> bams, List<String> intervals) {
|
||||
StringBuilder bamBuilder = new StringBuilder();
|
||||
do {
|
||||
bamBuilder.append(" -I ");
|
||||
bamBuilder.append(bams.remove(0));
|
||||
} while ( bams.size() > 0 );
|
||||
|
||||
StringBuilder intervalBuilder = new StringBuilder();
|
||||
do {
|
||||
intervalBuilder.append(" -L ");
|
||||
intervalBuilder.append(intervals.remove(0));
|
||||
} while ( intervals.size() > 0 );
|
||||
|
||||
|
||||
return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString();
|
||||
}
|
||||
|
||||
private void execute(String name, WalkerTestSpec spec) {
|
||||
|
|
@ -30,19 +47,23 @@ public class CoverageStatisticsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBaseOutputNoFiltering() {
|
||||
// our base file
|
||||
File baseOutputFile = this.createTempFile("outputtemp",".tmp");
|
||||
File baseOutputFile = this.createTempFile("depthofcoveragenofiltering",".tmp");
|
||||
this.setOutputFileLocation(baseOutputFile);
|
||||
|
||||
String bam_file = "/humgen/gsa-hphome1/chartl/projects/depthOfCoverage/testFiles/bams/Ciliopathies_1_88534_3_samples.bam";
|
||||
String interval_list = "chr1:855534";
|
||||
String reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
String cmd = buildRootCmd(reference,bam_file,interval_list) + " -mmq 0 -mbq 0 -omitSampleSummary -omitLocus";
|
||||
String expected = "d41d8cd98f00b204e9800998ecf8427e";
|
||||
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
|
||||
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
|
||||
|
||||
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts";
|
||||
String expected = "cb87d6069ac60c73f047efc6d9386619";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmd,1, Arrays.asList(expected));
|
||||
|
||||
// now add the expected files that get generated
|
||||
spec.addAuxFile("344936e0bb4613544ff137cd1a002d6c",new File(baseOutputFile.getAbsolutePath() + ".interval_statistics"));
|
||||
|
||||
execute("testBaseOutputNoFiltering",spec);
|
||||
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", new File(baseOutputFile.getAbsolutePath() + ".interval_statistics"));
|
||||
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", new File(baseOutputFile.getAbsolutePath()+".interval_summary"));
|
||||
spec.addAuxFile("c744a298b7541f3f823e6937e9a0bc67", new File(baseOutputFile.getAbsolutePath()+".locus_statistics"));
|
||||
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", new File(baseOutputFile.getAbsolutePath()+".summary_statistics"));
|
||||
spec.addAuxFile("9fc19f773a7ddfbb473d124e675a3d94", new File(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
if ( RUN_TESTS )
|
||||
execute("testBaseOutputNoFiltering",spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue