diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index e35705eee..7538b8693 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -1,13 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Set; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl @@ -17,8 +19,6 @@ import java.util.Map; */ public class CoverageUtils { - public enum PartitionType { BY_READ_GROUP, BY_SAMPLE } - /** * Returns the counts of bases from reads with MAPQ > minMapQ and base quality > minBaseQ in the context * as an array of ints, indexed by the index fields of BaseUtils @@ -40,21 +40,37 @@ public class CoverageUtils { return counts; } + public static String getTypeID( SAMRecord r, CoverageAggregator.AggregationType type ) { + if ( type == CoverageAggregator.AggregationType.SAMPLE ) { + return r.getReadGroup().getSample(); + } else if ( type == CoverageAggregator.AggregationType.READGROUP ) { + return String.format("%s_rg_%s",r.getReadGroup().getSample(),r.getReadGroup().getReadGroupId()); + } else if ( type == CoverageAggregator.AggregationType.LIBRARY ) { + return r.getReadGroup().getLibrary(); + } else { + throw new StingException("Invalid type ID sent to getTypeID. This is a BUG!"); + } + } + public static Map> + getBaseCountsByPartition(AlignmentContext context, int minMapQ, int minBaseQ, List types) { - public static Map getBaseCountsBySample(AlignmentContext context, int minMapQ, int minBaseQ, PartitionType type) { - Map samplesToCounts = new HashMap(); - + Map> countsByIDByType = new HashMap>(); + for (CoverageAggregator.AggregationType t : types ) { + countsByIDByType.put(t,new HashMap()); + } for (PileupElement e : context.getBasePileup()) { if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) { - String sample = type == PartitionType.BY_SAMPLE ? e.getRead().getReadGroup().getSample() : - e.getRead().getReadGroup().getSample()+"_rg_"+e.getRead().getReadGroup().getReadGroupId(); - if ( ! samplesToCounts.keySet().contains(sample) ) - samplesToCounts.put(sample,new int[6]); - updateCounts(samplesToCounts.get(sample),e); + for (CoverageAggregator.AggregationType t : types ) { + String id = getTypeID(e.getRead(),t); + if ( ! countsByIDByType.get(t).keySet().contains(id) ) { + countsByIDByType.get(t).put(id,new int[6]); + } + updateCounts(countsByIDByType.get(t).get(id),e); + } } } - return samplesToCounts; + return countsByIDByType; } private static void updateCounts(int[] counts, PileupElement e) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java index 70d1d0f26..6114a4024 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java @@ -84,6 +84,19 @@ public class DepthOfCoverageStats { totalDepthOfCoverage = 0; } + public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) { + this.binLeftEndpoints = cloneMe.binLeftEndpoints; + this.granularHistogramBySample = cloneMe.granularHistogramBySample; + this.totalCoverages = cloneMe.totalCoverages; + this.nLoci = cloneMe.nLoci; + this.totalLocusDepth = cloneMe.totalLocusDepth; + this.totalDepthOfCoverage = cloneMe.totalDepthOfCoverage; + this.locusHistogram = cloneMe.locusHistogram; + this.locusCoverageCounts = cloneMe.locusCoverageCounts; + this.tabulateLocusCounts = cloneMe.tabulateLocusCounts; + this.includeDeletions = cloneMe.includeDeletions; + } + public void addSample(String sample) { if ( granularHistogramBySample.containsKey(sample) ) { return; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 9f2cadedc..d9e9fa584 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -67,7 +67,7 @@ import java.util.*; // todo -- allow for user to set linear binning (default is logarithmic) // todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now @By(DataSource.REFERENCE) -public class DepthOfCoverageWalker extends LocusWalker, CoverageAggregator> implements TreeReducible { +public class DepthOfCoverageWalker extends LocusWalker>, CoverageAggregator> implements TreeReducible { @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) int start = 1; @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) @@ -90,10 +90,8 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera boolean printBinEndpointsAndExit = false; @Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false) boolean omitSampleSummary = false; - @Argument(fullName = "useReadGroups", shortName = "rg", doc = "Split depth of coverage output by read group rather than by sample", required = false) - boolean useReadGroup = false; - @Argument(fullName = "useBothSampleAndReadGroup", shortName = "both", doc = "Split output by both read group and by sample", required = false) - boolean useBoth = false; + @Argument(fullName = "partitionType", shortName = "pt", doc = "Partition type for depth of coverage. Defaults to sample. Can be any combination of sample, readgroup, library.", required = false) + String[] partitionTypes = new String[] {"sample"}; @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) @@ -104,7 +102,10 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera String outputFormat = "rtable"; String[] OUTPUT_FORMATS = {"table","rtable","csv"}; + String[] PARTITION_TYPES = {"sample","readgroup","library"}; String separator = "\t"; + List aggregationTypes = new ArrayList(); + Map> orderCheck = new HashMap>(); //////////////////////////////////////////////////////////////////////////////////// // STANDARD WALKER METHODS @@ -124,6 +125,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera System.exit(0); } + // Check the output format boolean goodOutputFormat = false; for ( String f : OUTPUT_FORMATS ) { goodOutputFormat = goodOutputFormat || f.equals(outputFormat); @@ -138,18 +140,33 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera separator = ","; } + // Check the partition types + for ( String t : partitionTypes ) { + boolean valid = false; + for ( String s : PARTITION_TYPES ) { + if ( s.equalsIgnoreCase(t) ) { + valid = true; + } + } + if ( ! valid ) { + throw new StingException("The partition type '"+t+"' is not a valid partition type. Please use any combination of 'sample','readgroup','library'."); + } else { + aggregationTypes.add(CoverageAggregator.typeStringToAggregationType(t)); + } + } + if ( getToolkit().getArguments().outFileName == null ) { logger.warn("This walker creates many output files from one input file; you may wish to specify an input file rather "+ "than defaulting all output to stdout."); } if ( ! omitDepthOutput ) { // print header - out.printf("%s\t%s\t%s","Locus","Total_Depth","Average_Depth"); - // get all the samples - HashSet allSamples = getSamplesFromToolKit(useReadGroup); - if ( useBoth ) { - allSamples.addAll(getSamplesFromToolKit(!useReadGroup)); + out.printf("%s\t%s","Locus","Total_Depth"); + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + out.printf("\t%s_%s","Average_Depth",agTypeToString(type)); } + // get all the samples + HashSet allSamples = getSamplesFromToolKit(aggregationTypes); for ( String s : allSamples) { out.printf("\t%s_%s","Depth_for",s); @@ -163,78 +180,77 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera } else { out.printf("Per-Locus Depth of Coverage output was omitted"); } + + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + orderCheck.put(type,new ArrayList()); + for ( String id : getSamplesFromToolKit(type) ) { + orderCheck.get(type).add(id); + } + Collections.sort(orderCheck.get(type)); + } } - private HashSet getSamplesFromToolKit( boolean getReadGroupsInstead ) { + private HashSet getSamplesFromToolKit( List types ) { HashSet partitions = new HashSet(); // since the DOCS object uses a HashMap, this will be in the same order - - if ( getReadGroupsInstead ) { - for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { - partitions.add(rg.getSample()+"_rg_"+rg.getReadGroupId()); - } - } else { - for ( Set sampleSet : getToolkit().getSamplesByReaders()) { - for (String s : sampleSet) { - partitions.add(s); - } - } + for (CoverageAggregator.AggregationType t : types ) { + partitions.addAll(getSamplesFromToolKit(t)); } return partitions; } + private HashSet getSamplesFromToolKit(CoverageAggregator.AggregationType type) { + HashSet partition = new HashSet(); + if ( type == CoverageAggregator.AggregationType.SAMPLE ) { + for ( Set sampleSet : getToolkit().getSamplesByReaders() ) { + for ( String s : sampleSet ) { + partition.add(s); + } + } + } else if ( type == CoverageAggregator.AggregationType.READGROUP ) { + for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { + partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId()); + } + } else if ( type == CoverageAggregator.AggregationType.LIBRARY ) { + for ( Set libraries : getToolkit().getLibrariesByReaders() ) { + for ( String l : libraries ) { + partition.add(l); + } + } + } else { + throw new StingException("Invalid aggregation type sent to getSamplesFromToolKit"); + } + + return partition; + } + public boolean isReduceByInterval() { return ( ! omitIntervals ); } public CoverageAggregator reduceInit() { - - CoverageAggregator.AggregationType agType; - if ( useBoth ) { - agType = CoverageAggregator.AggregationType.BOTH; - } else { - agType = useReadGroup ? CoverageAggregator.AggregationType.READ : CoverageAggregator.AggregationType.SAMPLE; + CoverageAggregator aggro = new CoverageAggregator(aggregationTypes,start,stop,nBins); + for (CoverageAggregator.AggregationType t : aggregationTypes ) { + aggro.addIdentifiers(t,getSamplesFromToolKit(t)); } - - CoverageAggregator aggro = new CoverageAggregator(agType,start,stop,nBins); - - if ( ! useReadGroup || useBoth ) { - aggro.addSamples(getSamplesFromToolKit(false)); - } - - if ( useReadGroup || useBoth ) { - aggro.addReadGroups(getSamplesFromToolKit(true)); - } - aggro.initialize(includeDeletions,omitLocusTable); - + checkOrder(aggro); return aggro; } - public Map map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public Map> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( ! omitDepthOutput ) { out.printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) //System.out.printf("\t[log]\t%s",ref.getLocus()); } - Map countsBySample = CoverageUtils.getBaseCountsBySample(context,minMappingQuality,minBaseQuality, - useReadGroup ? CoverageUtils.PartitionType.BY_READ_GROUP : CoverageUtils.PartitionType.BY_SAMPLE); - - if ( useBoth ) { - Map countsByOther = CoverageUtils.getBaseCountsBySample(context,minMappingQuality,minBaseQuality, - !useReadGroup ? CoverageUtils.PartitionType.BY_READ_GROUP : CoverageUtils.PartitionType.BY_SAMPLE); - for ( String s : countsByOther.keySet()) { - countsBySample.put(s,countsByOther.get(s)); - } - } - - return countsBySample; + return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,minBaseQuality,aggregationTypes); } - public CoverageAggregator reduce(Map thisMap, CoverageAggregator prevReduce) { + public CoverageAggregator reduce(Map> thisMap, CoverageAggregator prevReduce) { if ( ! omitDepthOutput ) { - printDepths(out,thisMap, prevReduce.getAllSamples()); + printDepths(out,thisMap, prevReduce.getIdentifiersByType()); // this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without // turning on omit } @@ -254,20 +270,26 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera //////////////////////////////////////////////////////////////////////////////////// public void onTraversalDone( List> statsByInterval ) { - if ( refSeqGeneList != null && ( useBoth || ! useReadGroup ) ) { + if ( refSeqGeneList != null && aggregationTypes.contains(CoverageAggregator.AggregationType.SAMPLE ) ) { printGeneStats(statsByInterval); } - if ( useBoth || ! useReadGroup ) { + if ( aggregationTypes.contains(CoverageAggregator.AggregationType.SAMPLE) ) { File intervalStatisticsFile = deriveFromStream("sample_interval_statistics"); File intervalSummaryFile = deriveFromStream("sample_interval_summary"); - printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, true); + printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, CoverageAggregator.AggregationType.SAMPLE ); } - if ( useBoth || useReadGroup ) { + if ( aggregationTypes.contains(CoverageAggregator.AggregationType.READGROUP ) ) { File intervalStatisticsFile = deriveFromStream("read_group_interval_statistics"); File intervalSummaryFile = deriveFromStream("read_group_interval_summary"); - printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, false); + printIntervalStats(statsByInterval,intervalSummaryFile, intervalStatisticsFile, CoverageAggregator.AggregationType.READGROUP); + } + + if ( aggregationTypes.contains(CoverageAggregator.AggregationType.LIBRARY) ) { + File intervalStatisticsFile = deriveFromStream("library_interval_statistics"); + File intervalSummaryFile = deriveFromStream("library_interval_summary"); + printIntervalStats(statsByInterval,intervalSummaryFile,intervalStatisticsFile,CoverageAggregator.AggregationType.LIBRARY); } onTraversalDone(mergeAll(statsByInterval)); @@ -283,7 +305,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera return first; } - private DepthOfCoverageStats printIntervalStats(List> statsByInterval, File summaryFile, File statsFile, boolean isSample) { + private DepthOfCoverageStats printIntervalStats(List> statsByInterval, File summaryFile, File statsFile, CoverageAggregator.AggregationType type) { PrintStream summaryOut; PrintStream statsOut; @@ -296,7 +318,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera Pair firstPair = statsByInterval.get(0); CoverageAggregator firstAggregator = firstPair.second; - DepthOfCoverageStats firstStats = isSample ? firstAggregator.getCoverageBySample() : firstAggregator.getCoverageByReadGroup(); + DepthOfCoverageStats firstStats = firstAggregator.getCoverageByAggregationType(type); StringBuilder summaryHeader = new StringBuilder(); summaryHeader.append("Target"); @@ -330,8 +352,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera for ( Pair targetAggregator : statsByInterval ) { Pair targetStats = new Pair( - targetAggregator.first, isSample ? targetAggregator.second.getCoverageBySample() : - targetAggregator.second.getCoverageByReadGroup()); + targetAggregator.first, targetAggregator.second.getCoverageByAggregationType(type)); printTargetSummary(summaryOut,targetStats); updateTargetTable(nTargetsByAvgCvgBySample,targetStats.second); } @@ -354,10 +375,10 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera for ( Pair targetStats : statsByTarget ) { String gene = getGeneName(targetStats.first,refseqIterator); if ( geneNamesToStats.keySet().contains(gene) ) { - geneNamesToStats.get(gene).merge(targetStats.second.getCoverageBySample()); + geneNamesToStats.get(gene).merge(targetStats.second.getCoverageByAggregationType(CoverageAggregator.AggregationType.SAMPLE)); } else { - geneNamesToStats.put(gene,targetStats.second.getCoverageBySample()); - statsByGene.add(new Pair(gene,targetStats.second.getCoverageBySample())); + geneNamesToStats.put(gene,targetStats.second.getCoverageByAggregationType(CoverageAggregator.AggregationType.SAMPLE)); + statsByGene.add(new Pair(gene,new DepthOfCoverageStats(targetStats.second.getCoverageByAggregationType(CoverageAggregator.AggregationType.SAMPLE)))); } } @@ -498,37 +519,44 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera if ( ! omitSampleSummary ) { logger.info("Printing summary info"); - if ( ! useReadGroup || useBoth ) { - File summaryStatisticsFile = deriveFromStream("sample_summary_statistics"); - File perSampleStatisticsFile = deriveFromStream("sample_statistics"); - printSummary(out,summaryStatisticsFile,coverageProfiles.getCoverageBySample()); - printPerSample(out,perSampleStatisticsFile,coverageProfiles.getCoverageBySample()); - } - - if ( useReadGroup || useBoth ) { - File rgStatsFile = deriveFromStream("read_group_summary"); - File rgSumFile = deriveFromStream("read_group_statistics"); - printSummary(out,rgStatsFile,coverageProfiles.getCoverageByReadGroup()); - printPerSample(out,rgSumFile,coverageProfiles.getCoverageByReadGroup()); + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + outputSummaryFiles(coverageProfiles,type); } } if ( ! omitLocusTable ) { logger.info("Printing locus summary"); - if ( ! useReadGroup || useBoth ) { - File perLocusStatisticsFile = deriveFromStream("sample_locus_statistics"); - File perLocusCoverageFile = deriveFromStream("sample_coverage_statistics"); - printPerLocus(perLocusStatisticsFile,perLocusCoverageFile,coverageProfiles.getCoverageBySample(),false); - } - - if ( useReadGroup || useBoth ) { - File perLocusRGStats = deriveFromStream("read_group_locus_statistics"); - File perLocusRGCoverage = deriveFromStream("read_group_locus_coverage"); - printPerLocus(perLocusRGStats,perLocusRGCoverage,coverageProfiles.getCoverageByReadGroup(),true); + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + outputLocusFiles(coverageProfiles,type); } } } + private String agTypeToString(CoverageAggregator.AggregationType type) { + if ( type == CoverageAggregator.AggregationType.SAMPLE ) { + return "sample"; + } else if ( type == CoverageAggregator.AggregationType.READGROUP ) { + return "read_group"; + } else if ( type == CoverageAggregator.AggregationType.LIBRARY ) { + return "library"; + } else { + throw new StingException("Invalid aggregation type "+type+" sent to agTypeToString. This is a BUG!"); + } + } + + private void outputLocusFiles(CoverageAggregator coverageProfiles, CoverageAggregator.AggregationType type ) { + File locusStats = deriveFromStream(agTypeToString(type)+"_cumulative_coverage_counts"); + File coverageStats = deriveFromStream(agTypeToString(type)+"_cumulative_coverage_proportions"); + printPerLocus(locusStats,coverageStats,coverageProfiles.getCoverageByAggregationType(type),agTypeToString(type)); + } + + private void outputSummaryFiles(CoverageAggregator coverageProfiles, CoverageAggregator.AggregationType type ) { + File summaryFile = deriveFromStream(agTypeToString(type)+"_summary"); + File statsFile = deriveFromStream(agTypeToString(type)+"_statistics"); + printPerSample(out,statsFile,coverageProfiles.getCoverageByAggregationType(type)); + printSummary(out,summaryFile,coverageProfiles.getCoverageByAggregationType(type)); + } + public File deriveFromStream(String append) { String name = getToolkit().getArguments().outFileName; if ( name == null || name.contains("stdout") || name.contains("Stdout") || name.contains("STDOUT")) { @@ -568,7 +596,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera } } - private void printPerLocus(File locusFile, File coverageFile, DepthOfCoverageStats stats, boolean isRG) { + private void printPerLocus(File locusFile, File coverageFile, DepthOfCoverageStats stats, String partitionType) { PrintStream output = getCorrectStream(out,locusFile); PrintStream coverageOut = getCorrectStream(out,coverageFile); if ( output == null ) { @@ -587,11 +615,7 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera StringBuilder header = new StringBuilder(); if ( printSampleColumnHeader ) { - if ( isRG ) { - header.append("ReadGroup"); - } else { - header.append("Sample"); - } + header.append(partitionType); } header.append(String.format("%sgte_0",separator)); for ( int d : endpoints ) { @@ -689,24 +713,34 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera return bin; } - private void printDepths(PrintStream stream, Map countsBySample, Set allSamples) { + private void printDepths(PrintStream stream, Map> countsBySampleByType, Map> identifiersByType) { // get the depths per sample and build up the output string while tabulating total and average coverage - // todo -- update me to deal with base counts/indels StringBuilder perSampleOutput = new StringBuilder(); int tDepth = 0; - for ( String s : allSamples ) { - perSampleOutput.append(separator); - long dp = countsBySample.keySet().contains(s) ? sumArray(countsBySample.get(s)) : 0; - perSampleOutput.append(dp); - if ( printBaseCounts ) { + boolean depthCounted = false; + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + Map countsByID = countsBySampleByType.get(type); + for ( String s : identifiersByType.get(type) ) { perSampleOutput.append(separator); - perSampleOutput.append(baseCounts(countsBySample.get(s))); + long dp = countsByID.keySet().contains(s) ? sumArray(countsByID.get(s)) : 0 ; + perSampleOutput.append(dp); + if ( printBaseCounts ) { + perSampleOutput.append(separator); + perSampleOutput.append(baseCounts(countsByID.get(s))); + } + if ( ! depthCounted ) { + tDepth += dp; + } } - tDepth += dp; + depthCounted = true; // only sum the total depth once } + // remember -- genome locus was printed in map() - stream.printf("%s%d%s%.2f%s%n",separator,tDepth,separator,( (double) tDepth/ (double) allSamples.size()), - perSampleOutput); + stream.printf("%s%d",separator,tDepth); + for (CoverageAggregator.AggregationType type : aggregationTypes ) { + stream.printf("%s%.2f",separator, ( (double) tDepth / identifiersByType.get(type).size() ) ); + } + stream.printf("%s%n",perSampleOutput); } private long sumArray(int[] array) { @@ -737,115 +771,99 @@ public class DepthOfCoverageWalker extends LocusWalker, Covera return s.toString(); } + + private void checkOrder(CoverageAggregator ag) { + for (CoverageAggregator.AggregationType t : aggregationTypes ) { + List order = orderCheck.get(t); + List namesInAg = ag.getIdentifiersByType().get(t); + Set namesInDOCS = ag.getCoverageByAggregationType(t).getAllSamples(); + int index = 0; + for ( String s : namesInAg ) { + if ( ! s.equalsIgnoreCase(order.get(index)) ) { + throw new StingException("IDs are out of order for type "+t+"! Aggregator has different ordering"); + } + index++; + } + } + } } class CoverageAggregator { - private DepthOfCoverageStats coverageByRead; - private DepthOfCoverageStats coverageBySample; - enum AggregationType { READ, SAMPLE, BOTH } + enum AggregationType { READGROUP, SAMPLE, LIBRARY } - private AggregationType agType; - private Set sampleNames; - private Set allSamples; - - public CoverageAggregator(AggregationType type, int start, int stop, int nBins) { - if ( type == AggregationType.READ || type == AggregationType.BOTH) { - coverageByRead = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins)); + private List types; + private Map coverageProfiles; + private Map> identifiersByType; + private Set allIdentifiers; + public CoverageAggregator(List typesToUse, int start, int stop, int nBins) { + coverageProfiles = new HashMap(); + identifiersByType = new HashMap>(); + types = typesToUse; + for ( AggregationType type : types ) { + coverageProfiles.put(type,new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins))); + identifiersByType.put(type,new ArrayList()); } - - if ( type == AggregationType.SAMPLE || type == AggregationType.BOTH) { - coverageBySample = new DepthOfCoverageStats(DepthOfCoverageStats.calculateBinEndpoints(start,stop,nBins)); - } - - agType = type; - allSamples = new HashSet(); + allIdentifiers = new HashSet(); } public void merge(CoverageAggregator otherAggregator) { - if ( agType == AggregationType.SAMPLE || agType == AggregationType.BOTH ) { - this.coverageBySample.merge(otherAggregator.coverageBySample); - } - - if ( agType == AggregationType.READ || agType == AggregationType.BOTH) { - this.coverageByRead.merge(otherAggregator.coverageByRead); + for ( AggregationType type : types ) { + this.coverageProfiles.get(type).merge(otherAggregator.coverageProfiles.get(type)); } } - public DepthOfCoverageStats getCoverageByReadGroup() { - return coverageByRead; + public DepthOfCoverageStats getCoverageByAggregationType(AggregationType t) { + return coverageProfiles.get(t); } - public DepthOfCoverageStats getCoverageBySample() { - return coverageBySample; - } - - public void addSamples(Set samples) { - for ( String s : samples ) { - coverageBySample.addSample(s); - allSamples.add(s); - } - - if ( agType == AggregationType.BOTH ) { - sampleNames = samples; + public void addIdentifiers(AggregationType t, Set ids) { + for ( String s : ids ) { + coverageProfiles.get(t).addSample(s); + identifiersByType.get(t).add(s); + allIdentifiers.add(s); } + Collections.sort(identifiersByType.get(t)); } - public void addReadGroups(Set readGroupNames) { - for ( String s : readGroupNames ) { - coverageByRead.addSample(s); - allSamples.add(s); - } - } - - public void initialize(boolean useDels, boolean omitLocusTable) { - if ( agType == AggregationType.SAMPLE || agType == AggregationType.BOTH ) { + for ( AggregationType t : types ) { if ( useDels ) { - coverageBySample.initializeDeletions(); + coverageProfiles.get(t).initializeDeletions(); } - if ( ! omitLocusTable ) { - coverageBySample.initializeLocusCounts(); - } - } - - if ( agType == AggregationType.READ || agType == AggregationType.BOTH) { - if ( useDels ) { - coverageByRead.initializeDeletions(); - } - - if ( ! omitLocusTable ) { - coverageByRead.initializeLocusCounts(); + coverageProfiles.get(t).initializeLocusCounts(); } } } - public void update(Map countsByIdentifier) { - if ( agType != AggregationType.BOTH ) { - if ( agType == AggregationType.SAMPLE ) { - coverageBySample.update(countsByIdentifier); - } else { - coverageByRead.update(countsByIdentifier); - } - } else { // have to separate samples and read groups - HashMap countsBySample = new HashMap(sampleNames.size()); - HashMap countsByRG = new HashMap(allSamples.size()-sampleNames.size()); - for ( String s : countsByIdentifier.keySet() ) { - if ( sampleNames.contains(s) ) { - countsBySample.put(s,countsByIdentifier.get(s)); - } else { // cannot use .remove() to save time due to concurrency issues - countsByRG.put(s,countsByIdentifier.get(s)); - } - } - - coverageBySample.update(countsBySample); - coverageByRead.update(countsByRG); + public void update(Map> countsByIdentifierByType) { + for ( AggregationType t : types ) { + coverageProfiles.get(t).update(countsByIdentifierByType.get(t)); } } - public Set getAllSamples() { - return allSamples; + public Set getAllIdentifiers() { + return allIdentifiers; } + public Map> getIdentifiersByType() { + return identifiersByType; + } + + public static AggregationType typeStringToAggregationType(String type) { + if ( type.equals("sample") ) { + return AggregationType.SAMPLE; + } + + if ( type.equals("library") ) { + return AggregationType.LIBRARY; + } + + if ( type.equals("readgroup") ) { + return AggregationType.READGROUP; + } + + throw new StingException("Valid partition type string "+type+" is not associated with an aggregation type. This is a BUG!"); + } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java index 9427daab1..d6c43e5a8 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java @@ -53,21 +53,29 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { String[] intervals = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/fhs_jhs_30_targts.interval_list"}; String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/FHS_indexed_subset.bam"}; - String cmd = buildRootCmd(hg18,new ArrayList(Arrays.asList(bams)),new ArrayList(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -both --outputFormat csv"; + String cmd = buildRootCmd(hg18,new ArrayList(Arrays.asList(bams)),new ArrayList(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -pt readgroup -pt sample -pt library --outputFormat csv"; WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList()); // now add the expected files that get generated - spec.addAuxFile("f53ddf25c2b71e46381f9c434402d88d", baseOutputFile); - spec.addAuxFile("925cc5b49286e0222bce6251d1baafc7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); - spec.addAuxFile("d9e812398d778f28ed12d7f3d18628e2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); - spec.addAuxFile("80577bf378de570f84d91b0ef6004102", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics")); - spec.addAuxFile("3a059ad82d945643dc4e03239c4041f5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics")); - spec.addAuxFile("f3315551081331bc322c53b11412d707", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("fc742e346be2344557cf8c039f467508", baseOutputFile); + spec.addAuxFile("e58b701b01ec0dbe75c146295434ba3b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); + spec.addAuxFile("b9a7748e5aec4dc06daed893c901c00d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); + spec.addAuxFile("848e556ec7e03e9b0398d189d7cbb4ad", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); + spec.addAuxFile("e6fc8f7a5fcc440e21de5891f3403d5d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("cac8e7a688d9bbe781232c61091d3237", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); + spec.addAuxFile("e452dfb5581a7aafaf2122c5ae497f1b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); + spec.addAuxFile("38fb89e1bb52d0342f97f72e86956b79", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); + spec.addAuxFile("f9f2941ee39577ac2f80668e7f6b3d4b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics")); spec.addAuxFile("fd29fe0c14351e934a6fef9df1f38f7b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); - spec.addAuxFile("111261f0e8ccf8c456d0b2a9482bc32c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_locus_statistics")); spec.addAuxFile("cc7ee5075a932dba486e78824ca34202", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); spec.addAuxFile("e1653480daa2d96f7c584ed4cd20c147", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); + spec.addAuxFile("529353375d23c529228b38119c51e269", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); + spec.addAuxFile("650ee3714da7fbad7832c9d4ad49eb51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("925cc5b49286e0222bce6251d1baafc7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); + spec.addAuxFile("d9e812398d778f28ed12d7f3d18628e2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("f3315551081331bc322c53b11412d707", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("3a059ad82d945643dc4e03239c4041f5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); execute("testBaseOutputNoFiltering",spec); }