Moved CoverageStatistics to core. This will be (soon) renamed DepthOfCoverage; so please use CoverageStatistics
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3090 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1e8b3ca6ba
commit
dc802aa26f
|
|
@ -43,11 +43,11 @@ PlotLocusQuantiles <- function(X) {
|
|||
medians = matrix(nrow=1,ncol=ncol(Z))
|
||||
quan90 = matrix(nrow=1,ncol=ncol(Z))
|
||||
for ( cc in 1:ncol(Z) ) {
|
||||
medians[cc] = median(Z[,cc])
|
||||
quan90[cc] = quantile(Z[,cc],0.9)
|
||||
medians[cc] = quantile(Z[,cc],0.75)
|
||||
quan90[cc] = quantile(Z[,cc],1)
|
||||
}
|
||||
|
||||
plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue")
|
||||
plot(t(medians),xlab="",xaxt="n",ylab="Proportion of loci with >X coverage",type="b",col="blue",yaxp=c(0,1,10))
|
||||
axis(1,labels=FALSE)
|
||||
parseColNames <- function(K) {
|
||||
M = matrix(nrow=1,ncol=length(K))
|
||||
|
|
@ -63,7 +63,7 @@ PlotLocusQuantiles <- function(X) {
|
|||
labels <- parseColNames(colnames(X))
|
||||
text(1:length(labels),par("usr")[3]-0.025,srt=90,adj=1,labels=labels,xpd=TRUE,cex=(0.8/32)*length(labels),lheight=(0.8/32)*length(labels))
|
||||
points(t(quan90),type="b",col="red")
|
||||
legend(x=floor(0.6*length(labels)),y=1,c("50% of samples","90% of samples"),col=c("red","blue"),lty=c(1,1))
|
||||
legend(x=floor(0.6*length(labels)),y=1,c("75% of samples","100% of samples"),col=c("red","blue"),lty=c(1,1))
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
|
@ -14,13 +13,13 @@ import org.broadinstitute.sting.gatk.walkers.By;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverageWalker;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.coverage.CoverageUtils;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.coverage.DepthOfCoverageStats;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
@ -127,7 +126,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
out.printf("Per-Locus Depth of Coverage output was omitted");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private HashSet<String> getSamplesFromToolKit( boolean getReadGroupsInstead ) {
|
||||
HashSet<String> partitions = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
|
||||
|
||||
|
|
@ -151,8 +150,13 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
}
|
||||
|
||||
public CoverageAggregator reduceInit() {
|
||||
CoverageAggregator.AggregationType agType = useBoth ? CoverageAggregator.AggregationType.BOTH :
|
||||
( useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE ) ;
|
||||
|
||||
CoverageAggregator.AggregationType agType;
|
||||
if ( useBoth ) {
|
||||
agType = CoverageAggregator.AggregationType.BOTH;
|
||||
} else {
|
||||
agType = useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE;
|
||||
}
|
||||
|
||||
CoverageAggregator aggro = new CoverageAggregator(agType,start,stop,nBins);
|
||||
|
||||
|
|
@ -170,7 +174,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
}
|
||||
|
||||
public Map<String,int[]> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
|
||||
if ( ! omitDepthOutput ) {
|
||||
out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives)
|
||||
//System.out.printf("\t[log]\t%s",ref.getLocus());
|
||||
|
|
@ -327,7 +331,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
if ( ! getToolkit().getArguments().outFileName.contains("stdout")) {
|
||||
geneSummaryOut.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//blatantly stolen from Andrew Kernytsky
|
||||
|
|
@ -450,7 +454,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
///////////////////
|
||||
// OPTIONAL OUTPUTS
|
||||
//////////////////
|
||||
|
||||
|
||||
if ( ! omitSampleSummary ) {
|
||||
logger.info("Printing summary info");
|
||||
if ( ! useReadGroup || useBoth ) {
|
||||
|
|
@ -561,7 +565,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
output = out;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
@ -606,7 +610,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
|
||||
return bin;
|
||||
}
|
||||
|
||||
|
||||
private void printDepths(PrintStream stream, Map<String,int[]> countsBySample, Set<String> allSamples) {
|
||||
// get the depths per sample and build up the output string while tabulating total and average coverage
|
||||
// todo -- update me to deal with base counts/indels
|
||||
|
|
@ -625,7 +629,7 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
// remember -- genome locus was printed in map()
|
||||
stream.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||
//System.out.printf("\t%d\t%.2f\t%s%n",tDepth,( (double) tDepth/ (double) allSamples.size()), perSampleOutput);
|
||||
|
||||
|
||||
}
|
||||
|
||||
private long sumArray(int[] array) {
|
||||
|
|
@ -658,7 +662,6 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageA
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
class CoverageAggregator {
|
||||
private DepthOfCoverageStats coverageByRead;
|
||||
private DepthOfCoverageStats coverageBySample;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
|
@ -10,7 +10,7 @@ hg18_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_hg18.rod"
|
|||
b36_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_b36.rod"
|
||||
b36_reference = "/broad/1KG/reference/human_b36_both.fasta"
|
||||
hg18_intervals = "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list"
|
||||
#hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list"
|
||||
hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list"
|
||||
b36_intervals = ""
|
||||
|
||||
min_base_q = "10"
|
||||
|
|
@ -37,8 +37,8 @@ else:
|
|||
fpref = "human_b36"
|
||||
|
||||
outputFile = projectName+"_bam_files.txt"
|
||||
OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
|
||||
OUTPUT_HEADER_INDIVIDUAL = ["reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
|
||||
OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
|
||||
OUTPUT_HEADER_INDIVIDUAL = ["reference_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"]
|
||||
|
||||
if ( spreadsheetPath.find("/") > -1 ):
|
||||
newSpreadsheet = spreadsheetPath.rsplit("/",1)[1].rsplit(".",1)[0]+"_proper_format.tsv"
|
||||
|
|
@ -81,7 +81,7 @@ for line in project_info.readlines():
|
|||
else:
|
||||
fingerprint_file = ""
|
||||
if ( spline[status_index] == "Complete" ):
|
||||
outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
|
||||
outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
|
||||
|
||||
outputFile.close()
|
||||
outputFile = open(projectName+"_Project_Entry.txt",'w')
|
||||
|
|
@ -90,4 +90,4 @@ outputFile.write(projectName)
|
|||
outputFile.close()
|
||||
outputFile = open(projectName+"_Population_Entry.txt",'w')
|
||||
outputFile.write("individual_id\tindividual_set_id\t"+"\t".join(OUTPUT_HEADER_INDIVIDUAL)+"\n")
|
||||
outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
|
||||
outputFile.write(groupName+"\t"+projectName+"\t"+reference+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")
|
||||
|
|
|
|||
|
|
@ -4,10 +4,11 @@ import sys
|
|||
import os
|
||||
|
||||
bam_file = sys.argv[1]
|
||||
project = sys.argv[2]
|
||||
fingerprint_file = sys.argv[2]
|
||||
project = sys.argv[3]
|
||||
directory = bam_file.rsplit("/",1)[0]+"/"
|
||||
sample_id = bam_file.rsplit("/",1)[1].rsplit(".",1)[0]
|
||||
is_metrics = directory+sample_id+".insert_size_metrics"
|
||||
his_metrics = directory+sample_id+".hybrid_selection_metrics"
|
||||
ali_metrics = directory+sample_id+".alignment_summary_metrics"
|
||||
os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics)
|
||||
os.system("zip -j "+project+"_"+sample_id+"_sequencing_metrics"+" "+is_metrics+" "+his_metrics+" "+ali_metrics+" "+fingerprint_file)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env Python
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue