Minor refactoring of CoverageStatistics to allow simultaneous output of per-sample and per-read group statistics.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2940 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
95d560aa2f
commit
8738c544f1
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
package org.broadinstitute.sting.oneoffprojects.walkers.coverage;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
|
|
@ -49,7 +50,7 @@ import java.util.*;
|
||||||
// todo -- allow for user to set linear binning (default is logarithmic)
|
// todo -- allow for user to set linear binning (default is logarithmic)
|
||||||
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCoverageStats> implements TreeReducible<DepthOfCoverageStats> {
|
public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageAggregator> implements TreeReducible<CoverageAggregator> {
|
||||||
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
||||||
int start = 1;
|
int start = 1;
|
||||||
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)
|
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)
|
||||||
|
|
@ -74,6 +75,8 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
boolean omitSampleSummary = false;
|
boolean omitSampleSummary = false;
|
||||||
@Argument(fullName = "useReadGroups", shortName = "rg", doc = "Split depth of coverage output by read group rather than by sample", required = false)
|
@Argument(fullName = "useReadGroups", shortName = "rg", doc = "Split depth of coverage output by read group rather than by sample", required = false)
|
||||||
boolean useReadGroup = false;
|
boolean useReadGroup = false;
|
||||||
|
@Argument(fullName = "useBothSampleAndReadGroup", shortName = "both", doc = "Split output by both read group and by sample", required = false)
|
||||||
|
boolean useBoth = false;
|
||||||
@Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false)
|
@Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false)
|
||||||
boolean includeDeletions = false;
|
boolean includeDeletions = false;
|
||||||
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
|
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
|
||||||
|
|
@ -105,21 +108,20 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
|
|
||||||
if ( ! omitDepthOutput ) { // print header
|
if ( ! omitDepthOutput ) { // print header
|
||||||
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
||||||
//System.out.printf("\t[log]\t%s\t%s\t%s","Locus","Total_Depth","Average_Depth");
|
|
||||||
// get all the samples
|
// get all the samples
|
||||||
HashSet<String> allSamples = getSamplesFromToolKit(useReadGroup);
|
HashSet<String> allSamples = getSamplesFromToolKit(useReadGroup);
|
||||||
|
if ( useBoth ) {
|
||||||
|
allSamples.addAll(getSamplesFromToolKit(!useReadGroup));
|
||||||
|
}
|
||||||
|
|
||||||
for ( String s : allSamples) {
|
for ( String s : allSamples) {
|
||||||
out.printf("\t%s_%s","Depth_for",s);
|
out.printf("\t%s_%s","Depth_for",s);
|
||||||
//System.out.printf("\t%s_%s","Depth_for",s);
|
|
||||||
if ( printBaseCounts ) {
|
if ( printBaseCounts ) {
|
||||||
out.printf("\t%s_%s",s,"base_counts");
|
out.printf("\t%s_%s",s,"base_counts");
|
||||||
//System.out.printf("\t%s_%s",s,"base_counts");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out.printf("%n");
|
out.printf("%n");
|
||||||
//System.out.printf("%n");
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
out.printf("Per-Locus Depth of Coverage output was omitted");
|
out.printf("Per-Locus Depth of Coverage output was omitted");
|
||||||
|
|
@ -130,10 +132,8 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
HashSet<String> partitions = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
|
HashSet<String> partitions = new HashSet<String>(); // since the DOCS object uses a HashMap, this will be in the same order
|
||||||
|
|
||||||
if ( getReadGroupsInstead ) {
|
if ( getReadGroupsInstead ) {
|
||||||
for ( Set<String> rgSet : getToolkit().getMergedReadGroupsByReaders() ) {
|
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||||
for ( String rg : rgSet ) {
|
partitions.add(rg.getSample()+"_rg_"+rg.getReadGroupId());
|
||||||
partitions.add(rg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders()) {
|
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders()) {
|
||||||
|
|
@ -150,23 +150,23 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
return ( ! omitIntervals );
|
return ( ! omitIntervals );
|
||||||
}
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats reduceInit() {
|
public CoverageAggregator reduceInit() {
|
||||||
DepthOfCoverageStats stats = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
CoverageAggregator.AggregationType agType = useBoth ? CoverageAggregator.AggregationType.BOTH :
|
||||||
|
( useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE) ;
|
||||||
|
|
||||||
for ( String sample : getSamplesFromToolKit(useReadGroup) ) {
|
CoverageAggregator aggro = new CoverageAggregator(agType,start,stop,nBins);
|
||||||
stats.addSample(sample);
|
|
||||||
|
if ( ! useReadGroup || useBoth ) {
|
||||||
|
aggro.addSamples(getSamplesFromToolKit(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( useReadGroup || useBoth ) {
|
||||||
if ( ! omitLocusTable ) {
|
aggro.addReadGroups(getSamplesFromToolKit(true));
|
||||||
stats.initializeLocusCounts();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( includeDeletions ) {
|
aggro.initialize(includeDeletions,omitLocusTable);
|
||||||
stats.initializeDeletions();
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats;
|
return aggro;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String,int[]> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Map<String,int[]> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -182,18 +182,19 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
return countsBySample;
|
return countsBySample;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats reduce(Map<String,int[]> thisMap, DepthOfCoverageStats prevReduce) {
|
public CoverageAggregator reduce(Map<String,int[]> thisMap, CoverageAggregator prevReduce) {
|
||||||
prevReduce.update(thisMap);
|
|
||||||
if ( ! omitDepthOutput ) {
|
if ( ! omitDepthOutput ) {
|
||||||
printDepths(out,thisMap, prevReduce.getAllSamples());
|
printDepths(out,thisMap, prevReduce.getAllSamples());
|
||||||
// this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without
|
// this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without
|
||||||
// turning on omit
|
// turning on omit
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prevReduce.update(thisMap); // note that in "useBoth" cases, this method alters the thisMap object
|
||||||
|
|
||||||
return prevReduce;
|
return prevReduce;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DepthOfCoverageStats treeReduce(DepthOfCoverageStats left, DepthOfCoverageStats right) {
|
public CoverageAggregator treeReduce(CoverageAggregator left, CoverageAggregator right) {
|
||||||
left.merge(right);
|
left.merge(right);
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
|
@ -202,17 +203,37 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
// INTERVAL ON TRAVERSAL DONE
|
// INTERVAL ON TRAVERSAL DONE
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
public void onTraversalDone( List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByInterval ) {
|
public void onTraversalDone( List<Pair<GenomeLoc,CoverageAggregator>> statsByInterval ) {
|
||||||
if ( refSeqGeneList != null ) {
|
if ( refSeqGeneList != null && ( useBoth || ! useReadGroup ) ) {
|
||||||
printGeneStats(statsByInterval);
|
printGeneStats(statsByInterval);
|
||||||
}
|
}
|
||||||
File intervalStatisticsFile = deriveFromStream("interval_statistics");
|
|
||||||
File intervalSummaryFile = deriveFromStream("interval_summary");
|
if ( useBoth || ! useReadGroup ) {
|
||||||
DepthOfCoverageStats mergedStats = printIntervalStatsAndMerge(statsByInterval,intervalSummaryFile, intervalStatisticsFile);
|
File intervalStatisticsFile = deriveFromStream("sample_interval_statistics");
|
||||||
this.onTraversalDone(mergedStats);
|
File intervalSummaryFile = deriveFromStream("sample_interval_summary");
|
||||||
|
printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DepthOfCoverageStats printIntervalStatsAndMerge(List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByInterval, File summaryFile, File statsFile) {
|
if ( useBoth || useReadGroup ) {
|
||||||
|
File intervalStatisticsFile = deriveFromStream("read_group_interval_statistics");
|
||||||
|
File intervalSummaryFile = deriveFromStream("read_group_interval_summary");
|
||||||
|
printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
onTraversalDone(mergeAll(statsByInterval));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public CoverageAggregator mergeAll(List<Pair<GenomeLoc,CoverageAggregator>> stats) {
|
||||||
|
CoverageAggregator first = stats.remove(0).second;
|
||||||
|
for ( Pair<GenomeLoc,CoverageAggregator> iStat : stats ) {
|
||||||
|
treeReduce(first,iStat.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
|
||||||
|
private DepthOfCoverageStats printIntervalStats(List<Pair<GenomeLoc,CoverageAggregator>> statsByInterval, File summaryFile, File statsFile, boolean isSample) {
|
||||||
PrintStream summaryOut;
|
PrintStream summaryOut;
|
||||||
PrintStream statsOut;
|
PrintStream statsOut;
|
||||||
|
|
||||||
|
|
@ -224,8 +245,9 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Pair<GenomeLoc,DepthOfCoverageStats> firstPair = statsByInterval.remove(0);
|
Pair<GenomeLoc,CoverageAggregator> firstPair = statsByInterval.get(0);
|
||||||
DepthOfCoverageStats firstStats = firstPair.second;
|
CoverageAggregator firstAggregator = firstPair.second;
|
||||||
|
DepthOfCoverageStats firstStats = isSample ? firstAggregator.getCoverageBySample() : firstAggregator.getCoverageByReadGroup();
|
||||||
|
|
||||||
StringBuilder summaryHeader = new StringBuilder();
|
StringBuilder summaryHeader = new StringBuilder();
|
||||||
summaryHeader.append("Target");
|
summaryHeader.append("Target");
|
||||||
|
|
@ -253,19 +275,14 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
summaryOut.printf("%s%n",summaryHeader);
|
summaryOut.printf("%s%n",summaryHeader);
|
||||||
|
|
||||||
int[][] nTargetsByAvgCvgBySample = new int[firstStats.getHistograms().size()][firstStats.getEndpoints().length+1];
|
int[][] nTargetsByAvgCvgBySample = new int[firstStats.getHistograms().size()][firstStats.getEndpoints().length+1];
|
||||||
for ( int i = 0; i < nTargetsByAvgCvgBySample.length; i ++ ) {
|
|
||||||
for ( int b = 0; b < nTargetsByAvgCvgBySample[0].length; b++) {
|
|
||||||
nTargetsByAvgCvgBySample[i][b] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printTargetSummary(summaryOut,firstPair);
|
for ( Pair<GenomeLoc,CoverageAggregator> targetAggregator : statsByInterval ) {
|
||||||
updateTargetTable(nTargetsByAvgCvgBySample,firstStats);
|
|
||||||
|
|
||||||
for ( Pair<GenomeLoc,DepthOfCoverageStats> targetStats : statsByInterval ) {
|
Pair<GenomeLoc,DepthOfCoverageStats> targetStats = new Pair<GenomeLoc,DepthOfCoverageStats>(
|
||||||
|
targetAggregator.first, isSample ? targetAggregator.second.getCoverageBySample() :
|
||||||
|
targetAggregator.second.getCoverageByReadGroup());
|
||||||
printTargetSummary(summaryOut,targetStats);
|
printTargetSummary(summaryOut,targetStats);
|
||||||
updateTargetTable(nTargetsByAvgCvgBySample,targetStats.second);
|
updateTargetTable(nTargetsByAvgCvgBySample,targetStats.second);
|
||||||
firstStats = this.treeReduce(firstStats,targetStats.second);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printIntervalTable(statsOut,nTargetsByAvgCvgBySample,firstStats.getEndpoints());
|
printIntervalTable(statsOut,nTargetsByAvgCvgBySample,firstStats.getEndpoints());
|
||||||
|
|
@ -278,18 +295,18 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
return firstStats;
|
return firstStats;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printGeneStats(List<Pair<GenomeLoc,DepthOfCoverageStats>> statsByTarget) {
|
private void printGeneStats(List<Pair<GenomeLoc,CoverageAggregator>> statsByTarget) {
|
||||||
LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq();
|
LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq();
|
||||||
List<Pair<String,DepthOfCoverageStats>> statsByGene = new ArrayList<Pair<String,DepthOfCoverageStats>>();// maintains order
|
List<Pair<String,DepthOfCoverageStats>> statsByGene = new ArrayList<Pair<String,DepthOfCoverageStats>>();// maintains order
|
||||||
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // alows indirect updating of objects in list
|
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // alows indirect updating of objects in list
|
||||||
|
|
||||||
for ( Pair<GenomeLoc,DepthOfCoverageStats> targetStats : statsByTarget ) {
|
for ( Pair<GenomeLoc,CoverageAggregator> targetStats : statsByTarget ) {
|
||||||
String gene = getGeneName(targetStats.first,refseqIterator);
|
String gene = getGeneName(targetStats.first,refseqIterator);
|
||||||
if ( geneNamesToStats.keySet().contains(gene) ) {
|
if ( geneNamesToStats.keySet().contains(gene) ) {
|
||||||
geneNamesToStats.get(gene).merge(targetStats.second);
|
geneNamesToStats.get(gene).merge(targetStats.second.getCoverageBySample());
|
||||||
} else {
|
} else {
|
||||||
geneNamesToStats.put(gene,targetStats.second);
|
geneNamesToStats.put(gene,targetStats.second.getCoverageBySample());
|
||||||
statsByGene.add(new Pair<String,DepthOfCoverageStats>(gene,targetStats.second));
|
statsByGene.add(new Pair<String,DepthOfCoverageStats>(gene,targetStats.second.getCoverageBySample()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -411,23 +428,39 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
// FINAL ON TRAVERSAL DONE
|
// FINAL ON TRAVERSAL DONE
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
public void onTraversalDone(DepthOfCoverageStats coverageProfiles) {
|
public void onTraversalDone(CoverageAggregator coverageProfiles) {
|
||||||
///////////////////
|
///////////////////
|
||||||
// OPTIONAL OUTPUTS
|
// OPTIONAL OUTPUTS
|
||||||
//////////////////
|
//////////////////
|
||||||
|
|
||||||
if ( ! omitSampleSummary ) {
|
if ( ! omitSampleSummary ) {
|
||||||
logger.info("Printing sample summary");
|
logger.info("Printing summary info");
|
||||||
File summaryStatisticsFile = deriveFromStream("summary_statistics");
|
if ( ! useReadGroup || useBoth ) {
|
||||||
|
File summaryStatisticsFile = deriveFromStream("sample_summary_statistics");
|
||||||
File perSampleStatisticsFile = deriveFromStream("sample_statistics");
|
File perSampleStatisticsFile = deriveFromStream("sample_statistics");
|
||||||
printSummary(out,summaryStatisticsFile,coverageProfiles);
|
printSummary(out,summaryStatisticsFile,coverageProfiles.getCoverageBySample());
|
||||||
printPerSample(out,perSampleStatisticsFile,coverageProfiles);
|
printPerSample(out,perSampleStatisticsFile,coverageProfiles.getCoverageBySample());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( useReadGroup || useBoth ) {
|
||||||
|
File rgStatsFile = deriveFromStream("read_group_summary");
|
||||||
|
File rgSumFile = deriveFromStream("read_group_statistics");
|
||||||
|
printSummary(out,rgStatsFile,coverageProfiles.getCoverageByReadGroup());
|
||||||
|
printPerSample(out,rgSumFile,coverageProfiles.getCoverageByReadGroup());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! omitLocusTable ) {
|
if ( ! omitLocusTable ) {
|
||||||
logger.info("Printing locus summary");
|
logger.info("Printing locus summary");
|
||||||
File perLocusStatisticsFile = deriveFromStream("locus_statistics");
|
if ( ! useReadGroup || useBoth ) {
|
||||||
printPerLocus(perLocusStatisticsFile,coverageProfiles);
|
File perLocusStatisticsFile = deriveFromStream("sample_locus_statistics");
|
||||||
|
printPerLocus(perLocusStatisticsFile,coverageProfiles.getCoverageBySample());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( useReadGroup || useBoth ) {
|
||||||
|
File perLocusRGStats = deriveFromStream("read_group_locus_statistics");
|
||||||
|
printPerLocus(perLocusRGStats,coverageProfiles.getCoverageByReadGroup());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -606,3 +639,111 @@ public class CoverageStatistics extends LocusWalker<Map<String,int[]>, DepthOfCo
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CoverageAggregator {
|
||||||
|
private DepthOfCoverageStats coverageByRead;
|
||||||
|
private DepthOfCoverageStats coverageBySample;
|
||||||
|
|
||||||
|
enum AggregationType { READ, SAMPLE, BOTH }
|
||||||
|
|
||||||
|
private AggregationType agType;
|
||||||
|
private Set<String> sampleNames;
|
||||||
|
private Set<String> allSamples;
|
||||||
|
|
||||||
|
public CoverageAggregator(AggregationType type, int start, int stop, int nBins) {
|
||||||
|
if ( type == AggregationType.READ || type == AggregationType.BOTH) {
|
||||||
|
coverageByRead = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( type == AggregationType.SAMPLE || type == AggregationType.BOTH) {
|
||||||
|
coverageBySample = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins));
|
||||||
|
}
|
||||||
|
|
||||||
|
agType = type;
|
||||||
|
allSamples = new HashSet<String>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void merge(CoverageAggregator otherAggregator) {
|
||||||
|
if ( agType == AggregationType.SAMPLE || agType == AggregationType.BOTH ) {
|
||||||
|
this.coverageBySample.merge(otherAggregator.coverageBySample);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( agType == AggregationType.READ || agType == AggregationType.BOTH) {
|
||||||
|
this.coverageByRead.merge(otherAggregator.coverageByRead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DepthOfCoverageStats getCoverageByReadGroup() {
|
||||||
|
return coverageByRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DepthOfCoverageStats getCoverageBySample() {
|
||||||
|
return coverageBySample;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSamples(Set<String> samples) {
|
||||||
|
for ( String s : samples ) {
|
||||||
|
coverageBySample.addSample(s);
|
||||||
|
allSamples.add(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( agType == AggregationType.BOTH ) {
|
||||||
|
sampleNames = samples;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addReadGroups(Set<String> readGroupNames) {
|
||||||
|
for ( String s : readGroupNames ) {
|
||||||
|
coverageByRead.addSample(s);
|
||||||
|
allSamples.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(boolean useDels, boolean omitLocusTable) {
|
||||||
|
if ( agType == AggregationType.SAMPLE || agType == AggregationType.BOTH ) {
|
||||||
|
if ( useDels ) {
|
||||||
|
coverageBySample.initializeDeletions();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! omitLocusTable ) {
|
||||||
|
coverageBySample.initializeLocusCounts();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( agType == AggregationType.READ || agType == AggregationType.BOTH) {
|
||||||
|
if ( useDels ) {
|
||||||
|
coverageByRead.initializeDeletions();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! omitLocusTable ) {
|
||||||
|
coverageByRead.initializeLocusCounts();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(Map<String,int[]> countsByIdentifier) {
|
||||||
|
if ( agType != AggregationType.BOTH ) {
|
||||||
|
if ( agType == AggregationType.SAMPLE ) {
|
||||||
|
coverageBySample.update(countsByIdentifier);
|
||||||
|
} else {
|
||||||
|
coverageByRead.update(countsByIdentifier);
|
||||||
|
}
|
||||||
|
} else { // have to separate samples and read groups
|
||||||
|
HashMap<String,int[]> countsBySample = new HashMap<String,int[]>(sampleNames.size());
|
||||||
|
for ( String s : countsByIdentifier.keySet() ) {
|
||||||
|
if ( sampleNames.contains(s) ) {
|
||||||
|
countsBySample.put(s,countsByIdentifier.remove(s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
coverageBySample.update(countsBySample);
|
||||||
|
coverageByRead.update(countsByIdentifier);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getAllSamples() {
|
||||||
|
return allSamples;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -24,7 +24,8 @@ public class CoverageUtils {
|
||||||
|
|
||||||
for (PileupElement e : context.getBasePileup()) {
|
for (PileupElement e : context.getBasePileup()) {
|
||||||
if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) {
|
if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) {
|
||||||
String sample = type == PartitionType.BY_SAMPLE ? e.getRead().getReadGroup().getSample() : e.getRead().getReadGroup().getReadGroupId();
|
String sample = type == PartitionType.BY_SAMPLE ? e.getRead().getReadGroup().getSample() :
|
||||||
|
e.getRead().getReadGroup().getSample()+"_rg_"+e.getRead().getReadGroup().getReadGroupId();
|
||||||
if ( samplesToCounts.keySet().contains(sample) ) {
|
if ( samplesToCounts.keySet().contains(sample) ) {
|
||||||
updateCounts(samplesToCounts.get(sample),e);
|
updateCounts(samplesToCounts.get(sample),e);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -58,10 +58,10 @@ public class CoverageStatisticsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
// now add the expected files that get generated
|
// now add the expected files that get generated
|
||||||
spec.addAuxFile("cb87d6069ac60c73f047efc6d9386619", baseOutputFile);
|
spec.addAuxFile("cb87d6069ac60c73f047efc6d9386619", baseOutputFile);
|
||||||
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".interval_statistics"));
|
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||||
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".interval_summary"));
|
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||||
spec.addAuxFile("c744a298b7541f3f823e6937e9a0bc67", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".locus_statistics"));
|
spec.addAuxFile("c744a298b7541f3f823e6937e9a0bc67", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics"));
|
||||||
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".summary_statistics"));
|
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics"));
|
||||||
spec.addAuxFile("9fc19f773a7ddfbb473d124e675a3d94", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
spec.addAuxFile("9fc19f773a7ddfbb473d124e675a3d94", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||||
execute("testBaseOutputNoFiltering",spec);
|
execute("testBaseOutputNoFiltering",spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue