Moved CoverageStatistics to core. This will be (soon) renamed DepthOfCoverage; so please use CoverageStatistics

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3090 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-03-29 13:32:00 +00:00
parent 1e8b3ca6ba
commit dc802aa26f
8 changed files with 33 additions and 29 deletions

View File

@ -43,11 +43,11 @@ PlotLocusQuantiles <- function(X) {
medians = matrix(nrow=1,ncol=ncol(Z))
quan90 = matrix(nrow=1,ncol=ncol(Z))
for ( cc in 1:ncol(Z) ) {
medians[cc] = median(Z[,cc])
quan90[cc] = quantile(Z[,cc],0.9)
medians[cc] = quantile(Z[,cc],0.75)
quan90[cc] = quantile(Z[,cc],1)
}
plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue")
plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue",yaxp=c(0,1,10))
axis(1,labels=FALSE)
parseColNames <- function(K) {
M = matrix(nrow=1,ncol=length(K))
@ -63,7 +63,7 @@ PlotLocusQuantiles <- function(X) {
labels <- parseColNames(colnames(X))
text(1:length(labels),par("usr")[3]-0.025,srt=90,adj=1,labels=labels,xpd=TRUE,cex=(0.8/32)*length(labels),lheight=(0.8/32)*length(labels))
points(t(quan90),type="b",col="red")
legend(x=floor(0.6*length(labels)),y=1,c("50% of samples","90% of samples"),col=c("red","blue"),lty=c(1,1))
legend(x=floor(0.6*length(labels)),y=1,c("75% of samples","100% of samples"),col=c("red","blue"),lty=c(1,1))
dev.off()
}

View File

@ -1,9 +1,8 @@
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
package org.broadinstitute.sting.gatk.walkers.coverage;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@ -14,13 +13,13 @@ import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverageWalker;
import org.broadinstitute.sting.oneoffprojects.walkers.coverage.CoverageUtils;
import org.broadinstitute.sting.oneoffprojects.walkers.coverage.DepthOfCoverageStats;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.io.File;
import java.io.IOException;
@ -127,7 +126,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
out.printf("Per-Locus Depth of Coverage output was omitted");
}
}
private HashSet<String> getSamplesFromToolKit( boolean getReadGroupsInstead ) {
HashSet<String> partitions = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
@ -151,8 +150,13 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
}
public CoverageAggregator reduceInit() {
CoverageAggregator.AggregationType agType = useBoth ? CoverageAggregator.AggregationType.BOTH :
( useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE ) ;
CoverageAggregator.AggregationType agType;
if ( useBoth ) {
agType = CoverageAggregator.AggregationType.BOTH;
} else {
agType = useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE;
}
CoverageAggregator aggro = new CoverageAggregator(agType,start,stop,nBins);
@ -170,7 +174,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
}
public Map<String,int[]> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( ! omitDepthOutput ) {
out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives)
//System.out.printf("\t[log]\t%s",ref.getLocus());
@ -327,7 +331,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
if ( ! getToolkit().getArguments().outFileName.contains("stdout")) {
geneSummaryOut.close();
}
}
//blatantly stolen from Andrew Kernytsky
@ -450,7 +454,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
///////////////////
// OPTIONAL OUTPUTS
//////////////////
if ( ! omitSampleSummary ) {
logger.info("Printing summary info");
if ( ! useReadGroup || useBoth ) {
@ -561,7 +565,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
output = out;
}
}
return output;
}
@ -606,7 +610,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
return bin;
}
private void printDepths(PrintStream stream, Map<String,int[]> countsBySample, Set<String> allSamples) {
// get the depths per sample and build up the output string while tabulating total and average coverage
// todo -- update me to deal with base counts/indels
@ -625,7 +629,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
// remember -- genome locus was printed in map()
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
//System.out.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
}
private long sumArray(int[] array) {
@ -658,7 +662,6 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
}
}
class CoverageAggregator {
private DepthOfCoverageStats coverageByRead;
private DepthOfCoverageStats coverageBySample;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.BaseUtils;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.sting.utils.BaseUtils;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;

View File

@ -10,7 +10,7 @@ hg18_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_hg18.rod"
b36_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_b36.rod"
b36_reference = "/broad/1KG/reference/human_b36_both.fasta"
hg18_intervals = "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list"
#hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list"
hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list"
b36_intervals = ""
min_base_q = "10"
@ -37,8 +37,8 @@ else:
fpref = "human_b36"
outputFile = projectName+"_bam_files.txt"
OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
OUTPUT_HEADER_INDIVIDUAL = ["reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
OUTPUT_HEADER_INDIVIDUAL = ["reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
if ( spreadsheetPath.find("/") > -1 ):
newSpreadsheet = spreadsheetPath.rsplit("/",1)[1].rsplit(".",1)[0]+"_proper_format.tsv"
@ -81,7 +81,7 @@ for line in project_info.readlines():
else:
fingerprint_file = ""
if ( spline[status_index] == "Complete" ):
outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
outputFile.close()
outputFile = open(projectName+"_Project_Entry.txt",'w')
@ -90,4 +90,4 @@ outputFile.write(projectName)
outputFile.close()
outputFile = open(projectName+"_Population_Entry.txt",'w')
outputFile.write("individual_id\tindividual_set_id\t"+"\t".join(OUTPUT_HEADER_INDIVIDUAL)+"\n")
outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")

View File

@ -4,10 +4,11 @@ import sys
import os
bam_file = sys.argv[1]
project = sys.argv[2]
fingerprint_file = sys.argv[2]
project = sys.argv[3]
directory = bam_file.rsplit("/",1)[0]+"/"
sample_id = bam_file.rsplit("/",1)[1].rsplit(".",1)[0]
is_metrics = directory+sample_id+".insert_size_metrics"
his_metrics = directory+sample_id+".hybrid_selection_metrics"
ali_metrics = directory+sample_id+".alignment_summary_metrics"
os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics)
os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics+" "+fingerprint_file)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env Python
#!/usr/bin/env python
import sys