From dc802aa26f329c335fff1a359cff083e5e214327 Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 29 Mar 2010 13:32:00 +0000 Subject: [PATCH] Moved CoverageStatistics to core. This will be (soon) renamed DepthOfCoverage; so please use CoverageStatistics git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3090 348d0f76-0448-11de-a6fe-93d51630548a --- R/PlotDepthOfCoverage.R | 8 ++--- .../walkers/coverage/CoverageStatistics.java | 31 ++++++++++--------- .../walkers/coverage/CoverageUtils.java | 2 +- .../coverage/DepthOfCoverageStats.java | 2 +- .../CoverageStatisticsIntegrationTest.java | 2 +- python/getBamFilesFromSpreadsheet.py | 10 +++--- python/makeMetricsFilesForFirehose.py | 5 +-- python/mergeVCFInfoFields.py | 2 +- 8 files changed, 33 insertions(+), 29 deletions(-) rename java/src/org/broadinstitute/sting/{oneoffprojects => gatk}/walkers/coverage/CoverageStatistics.java (98%) rename java/src/org/broadinstitute/sting/{oneoffprojects => gatk}/walkers/coverage/CoverageUtils.java (96%) rename java/src/org/broadinstitute/sting/{oneoffprojects => gatk}/walkers/coverage/DepthOfCoverageStats.java (99%) rename java/test/org/broadinstitute/sting/{oneoffprojects/walkers/coverage => gatk/walkers}/CoverageStatisticsIntegrationTest.java (98%) diff --git a/R/PlotDepthOfCoverage.R b/R/PlotDepthOfCoverage.R index 5bb7504c5..c4e2b271b 100644 --- a/R/PlotDepthOfCoverage.R +++ b/R/PlotDepthOfCoverage.R @@ -43,11 +43,11 @@ PlotLocusQuantiles <- function(X) { medians = matrix(nrow=1,ncol=ncol(Z)) quan90 = matrix(nrow=1,ncol=ncol(Z)) for ( cc in 1:ncol(Z) ) { - medians[cc] = median(Z[,cc]) - quan90[cc] = quantile(Z[,cc],0.9) + medians[cc] = quantile(Z[,cc],0.75) + quan90[cc] = quantile(Z[,cc],1) } - plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue") + plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue",yaxp=c(0,1,10)) axis(1,labels=FALSE) parseColNames <- function(K) { M = matrix(nrow=1,ncol=length(K)) @@ -63,7 +63,7 @@ PlotLocusQuantiles <- function(X) { labels <- parseColNames(colnames(X)) text(1:length(labels),par("usr")[3]-0.025,srt=90,adj=1,labels=labels,xpd=TRUE,cex=(0.8/32)*length(labels),lheight=(0.8/32)*length(labels)) points(t(quan90),type="b",col="red") - legend(x=floor(0.6*length(labels)),y=1,c("50% of samples","90% of samples"),col=c("red","blue"),lty=c(1,1)) + legend(x=floor(0.6*length(labels)),y=1,c("75% of samples","100% of samples"),col=c("red","blue"),lty=c(1,1)) dev.off() } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatistics.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageStatistics.java similarity index 98% rename from java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatistics.java rename to java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageStatistics.java index f089dda56..105567540 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatistics.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageStatistics.java @@ -1,9 +1,8 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.coverage; +package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; @@ -14,13 +13,13 @@ import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverageWalker; +import org.broadinstitute.sting.oneoffprojects.walkers.coverage.CoverageUtils; +import org.broadinstitute.sting.oneoffprojects.walkers.coverage.DepthOfCoverageStats; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.pileup.PileupElement; import java.io.File; import java.io.IOException; @@ -127,7 +126,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA out.printf("Per-Locus Depth of Coverage output was omitted"); } } - + private HashSet getSamplesFromToolKit( boolean getReadGroupsInstead ) { HashSet partitions = new HashSet(); // since the DOCS object uses a HashMap, this will be in the same order @@ -151,8 +150,13 @@ public class CoverageStatistics extends LocusWalker, CoverageA } public CoverageAggregator reduceInit() { - CoverageAggregator.AggregationType agType = useBoth ? CoverageAggregator.AggregationType.BOTH : - ( useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE ) ; + + CoverageAggregator.AggregationType agType; + if ( useBoth ) { + agType = CoverageAggregator.AggregationType.BOTH; + } else { + agType = useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE; + } CoverageAggregator aggro = new CoverageAggregator(agType,start,stop,nBins); @@ -170,7 +174,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA } public Map map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - + if ( ! omitDepthOutput ) { out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) //System.out.printf("\t[log]\t%s",ref.getLocus()); @@ -327,7 +331,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA if ( ! getToolkit().getArguments().outFileName.contains("stdout")) { geneSummaryOut.close(); } - + } //blatantly stolen from Andrew Kernytsky @@ -450,7 +454,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA /////////////////// // OPTIONAL OUTPUTS ////////////////// - + if ( ! omitSampleSummary ) { logger.info("Printing summary info"); if ( ! useReadGroup || useBoth ) { @@ -561,7 +565,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA output = out; } } - + return output; } @@ -606,7 +610,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA return bin; } - + private void printDepths(PrintStream stream, Map countsBySample, Set allSamples) { // get the depths per sample and build up the output string while tabulating total and average coverage // todo -- update me to deal with base counts/indels @@ -625,7 +629,7 @@ public class CoverageStatistics extends LocusWalker, CoverageA // remember -- genome locus was printed in map() stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput); //System.out.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput); - + } private long sumArray(int[] array) { @@ -658,7 +662,6 @@ public class CoverageStatistics extends LocusWalker, CoverageA } } - class CoverageAggregator { private DepthOfCoverageStats coverageByRead; private DepthOfCoverageStats coverageBySample; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java similarity index 96% rename from java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageUtils.java rename to java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index 893e64b8e..b3c360222 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.coverage; +package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.BaseUtils; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/DepthOfCoverageStats.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java similarity index 99% rename from java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/DepthOfCoverageStats.java rename to java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java index 60add05f1..368015a60 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/coverage/DepthOfCoverageStats.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.coverage; +package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.utils.BaseUtils; diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatisticsIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/CoverageStatisticsIntegrationTest.java similarity index 98% rename from java/test/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatisticsIntegrationTest.java rename to java/test/org/broadinstitute/sting/gatk/walkers/CoverageStatisticsIntegrationTest.java index 8b67bb82b..1b8ef71ea 100644 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/coverage/CoverageStatisticsIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/CoverageStatisticsIntegrationTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.coverage; +package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.WalkerTest; import org.junit.Test; diff --git a/python/getBamFilesFromSpreadsheet.py b/python/getBamFilesFromSpreadsheet.py index 8ae95b2df..bb6980780 100755 --- a/python/getBamFilesFromSpreadsheet.py +++ b/python/getBamFilesFromSpreadsheet.py @@ -10,7 +10,7 @@ hg18_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_hg18.rod" b36_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_b36.rod" b36_reference = "/broad/1KG/reference/human_b36_both.fasta" hg18_intervals = "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list" -#hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list" +hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list" b36_intervals = "" min_base_q = "10" @@ -37,8 +37,8 @@ else: fpref = "human_b36" outputFile = projectName+"_bam_files.txt" -OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"] -OUTPUT_HEADER_INDIVIDUAL = ["reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"] +OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"] +OUTPUT_HEADER_INDIVIDUAL = ["reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"] if ( spreadsheetPath.find("/") > -1 ): newSpreadsheet = spreadsheetPath.rsplit("/",1)[1].rsplit(".",1)[0]+"_proper_format.tsv" @@ -81,7 +81,7 @@ for line in project_info.readlines(): else: fingerprint_file = "" if ( spline[status_index] == "Complete" ): - outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n") + outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n") outputFile.close() outputFile = open(projectName+"_Project_Entry.txt",'w') @@ -90,4 +90,4 @@ outputFile.write(projectName) outputFile.close() outputFile = open(projectName+"_Population_Entry.txt",'w') outputFile.write("individual_id\tindividual_set_id\t"+"\t".join(OUTPUT_HEADER_INDIVIDUAL)+"\n") -outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n") +outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n") diff --git a/python/makeMetricsFilesForFirehose.py b/python/makeMetricsFilesForFirehose.py index e13d8d1fc..623c9d042 100755 --- a/python/makeMetricsFilesForFirehose.py +++ b/python/makeMetricsFilesForFirehose.py @@ -4,10 +4,11 @@ import sys import os bam_file = sys.argv[1] -project = sys.argv[2] +fingerprint_file = sys.argv[2] +project = sys.argv[3] directory = bam_file.rsplit("/",1)[0]+"/" sample_id = bam_file.rsplit("/",1)[1].rsplit(".",1)[0] is_metrics = directory+sample_id+".insert_size_metrics" his_metrics = directory+sample_id+".hybrid_selection_metrics" ali_metrics = directory+sample_id+".alignment_summary_metrics" -os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics) +os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics+" "+fingerprint_file) diff --git a/python/mergeVCFInfoFields.py b/python/mergeVCFInfoFields.py index c3829d935..3c970f3eb 100644 --- a/python/mergeVCFInfoFields.py +++ b/python/mergeVCFInfoFields.py @@ -1,4 +1,4 @@ -#!/usr/bin/env Python +#!/usr/bin/env python import sys