From a88202c3f66d38764fd98efdae44562d5d731e17 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 3 Dec 2009 05:28:21 +0000 Subject: [PATCH] Refactored DoCWalker to output in a more helpful and usable style. It now outputs in tabular format with 2 different sections: per locus and then per interval. I am now at a point where I can merge the functionality from other coverage walkers into this one. Thanks to Andrew for input. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2239 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/DepthOfCoverageWalker.java | 216 ++++++++++++------ .../DepthOfCoverageIntegrationTest.java | 18 +- 2 files changed, 159 insertions(+), 75 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java index 060374fad..6bd92776e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; import net.sf.samtools.SAMReadGroupRecord; @@ -38,7 +38,7 @@ import java.util.*; * Display the depth of coverage at a given locus. */ @By(DataSource.REFERENCE) -public class DepthOfCoverageWalker extends LocusWalker> { +public class DepthOfCoverageWalker extends LocusWalker { enum printType { NONE, COMPACT, @@ -48,10 +48,7 @@ public class DepthOfCoverageWalker extends LocusWalker @Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false) protected printType printStyle = printType.COMPACT; - @Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false) - protected boolean excludeDeletionsInCoverage = false; - - @Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false) + @Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will also list read counts with MAPQ >= this value at a locus in coverage",required=false) protected int excludeMAPQBelowThis = -1; @Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group") @@ -60,23 +57,13 @@ public class DepthOfCoverageWalker extends LocusWalker @Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample") protected boolean bySample = false; - + // keep track of the read group and sample names private TreeSet readGroupNames = new TreeSet(); private TreeSet sampleNames = new TreeSet(); - - - public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; } + public boolean includeReadsWithDeletionAtLoci() { return true; } public void initialize() { - switch ( printStyle ) { - case COMPACT: - out.printf("locus depth%n"); - break; - case DETAILED: - out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n"); - break; - } if ( byReadGroup ) { List readGroups = this.getToolkit().getSAMFileHeader().getReadGroups(); @@ -92,24 +79,41 @@ public class DepthOfCoverageWalker extends LocusWalker sampleNames.add(sample); } } + + StringBuilder header = new StringBuilder("location\ttotal_coverage\tcoverage_without_deletions"); + if ( excludeMAPQBelowThis > 0 ) { + header.append("\tcoverage_atleast_MQ"); + header.append(excludeMAPQBelowThis); + } + if ( byReadGroup ) { + for ( String rg : readGroupNames ) { + header.append("\tcoverage_for_"); + header.append(rg); + } + } + if ( bySample ) { + for ( String sample : sampleNames ) { + header.append("\tcoverage_for_"); + header.append(sample); + } + } + out.println(header.toString()); } - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0; - - HashMap depthByReadGroup = new HashMap(); - for ( String readGroupName : readGroupNames ) - depthByReadGroup.put(readGroupName, 0); - HashMap depthBySample = new HashMap(); - for ( String sample : sampleNames ) - depthBySample.put(sample, 0); + public DoCInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { ReadBackedPileup pileup = context.getPileup(); + + DoCInfo info = new DoCInfo(); + info.totalCoverage = pileup.size(); + + int nBadMAPQReads = 0, nDeletionReads = 0; for ( PileupElement p : pileup ) { - if ( p.getRead().getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++; - else if ( p.isDeletion() ) nDeletionReads++; - else nCleanReads++; + if ( excludeMAPQBelowThis > 0 && p.getRead().getMappingQuality() < excludeMAPQBelowThis ) + nBadMAPQReads++; + else if ( p.isDeletion() ) + nDeletionReads++; SAMReadGroupRecord readGroup = p.getRead().getReadGroup(); if ( readGroup == null ) @@ -117,59 +121,143 @@ public class DepthOfCoverageWalker extends LocusWalker if ( byReadGroup ) { String readGroupName = readGroup.getReadGroupId(); - int oldDepth = depthByReadGroup.get(readGroupName); - depthByReadGroup.put(readGroupName, oldDepth + 1); + int oldDepth = info.depthByReadGroup.get(readGroupName); + info.depthByReadGroup.put(readGroupName, oldDepth + 1); } if ( bySample ) { String sample = readGroup.getSample(); if ( sample != null ) { - int oldDepth = depthBySample.get(sample); - depthBySample.put(sample, oldDepth + 1); + int oldDepth = info.depthBySample.get(sample); + info.depthBySample.put(sample, oldDepth + 1); } } } - int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads); + info.numDeletions = nDeletionReads; + if ( excludeMAPQBelowThis > 0 ) + info.numBadMQReads = nBadMAPQReads; - switch ( printStyle ) { - case COMPACT: - out.printf("%s %8d%n", context.getLocation(), nTotalReads); - break; - case DETAILED: - out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads); - break; - } + printDoCInfo(context.getLocation(), info, false); - if ( byReadGroup ) { - for ( String rg : readGroupNames ) { - out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg)); - } - } - - if ( bySample ) { - for ( String sample : sampleNames ) { - out.printf(" %s %8d%n", sample, depthBySample.get(sample)); - } - } - - return nTotalReads; + return info; } public boolean isReduceByInterval() { return true; } - public Pair reduceInit() { return new Pair(0l,0l); } + public DoCInfo reduceInit() { return new DoCInfo(); } - public Pair reduce(Integer value, Pair sum) { - long left = value.longValue() + sum.getFirst(); - long right = sum.getSecond() + 1l; - return new Pair(left, right); + public DoCInfo reduce(DoCInfo value, DoCInfo sum) { + sum.totalCoverage += value.totalCoverage; + sum.numDeletions += value.numDeletions; + sum.numBadMQReads += value.numBadMQReads; + if ( byReadGroup ) { + for ( String rg : readGroupNames ) { + int oldDepth = sum.depthByReadGroup.get(rg); + sum.depthByReadGroup.put(rg, oldDepth + value.depthByReadGroup.get(rg)); + } + } + if ( bySample ) { + for ( String sample : sampleNames ) { + int oldDepth = sum.depthBySample.get(sample); + sum.depthBySample.put(sample, oldDepth + value.depthBySample.get(sample)); + } + } + + return sum; } - public void onTraversalDone(Pair result) { - out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n", - ((double)result.getFirst() / result.getSecond()), result.getFirst(), result.getSecond()); + @Override + public void onTraversalDone(List> results) { + + StringBuilder header = new StringBuilder("\nlocation\ttotal_coverage\taverage_coverage\tcoverage_without_deletions\taverage_coverage_without_deletions"); + if ( excludeMAPQBelowThis > 0 ) { + header.append("\tcoverage_atleast_MQ"); + header.append(excludeMAPQBelowThis); + header.append("\taverage_coverage_atleast_MQ"); + header.append(excludeMAPQBelowThis); + } + if ( byReadGroup ) { + for ( String rg : readGroupNames ) { + header.append("\tcoverage_for_"); + header.append(rg); + } + } + if ( bySample ) { + for ( String sample : sampleNames ) { + header.append("\tcoverage_for_"); + header.append(sample); + } + } + out.println(header.toString()); + + for ( Pair result : results ) + printDoCInfo(result.first, result.second, true); + } + + private void printDoCInfo(GenomeLoc loc, DoCInfo info, boolean printAverageCoverage) { + + StringBuilder sb = new StringBuilder(); + sb.append(loc); + sb.append("\t"); + sb.append(info.totalCoverage); + sb.append("\t"); + if ( printAverageCoverage ) { + sb.append(String.format("%.2f", ((double)info.totalCoverage) / (double)(loc.getStop() - loc.getStart() + 1))); + sb.append("\t"); + } + sb.append((info.totalCoverage - info.numDeletions)); + if ( printAverageCoverage ) { + sb.append("\t"); + sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numDeletions)) / (double)(loc.getStop() - loc.getStart() + 1))); + } + if ( excludeMAPQBelowThis > 0 ) { + sb.append("\t"); + sb.append((info.totalCoverage - info.numBadMQReads)); + if ( printAverageCoverage ) { + sb.append("\t"); + sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numBadMQReads)) / (double)(loc.getStop() - loc.getStart() + 1))); + } + } + if ( byReadGroup ) { + for ( String rg : readGroupNames ) { + sb.append("\t"); + sb.append(String.format("%8d", info.depthByReadGroup.get(rg))); + } + } + + if ( bySample ) { + for ( String sample : sampleNames ) { + sb.append("\t"); + sb.append(String.format("%8d", info.depthBySample.get(sample))); + } + } + + out.println(sb.toString()); + } + + public class DoCInfo { + public int totalCoverage = 0; + public int numDeletions = 0; + public int numBadMQReads = 0; + + public HashMap depthByReadGroup = null; + public HashMap depthBySample = null; + + public DoCInfo() { + if ( byReadGroup ) { + depthByReadGroup = new HashMap(); + for ( String readGroupName : readGroupNames ) + depthByReadGroup.put(readGroupName, 0); + } + if ( bySample ) { + depthBySample = new HashMap(); + for ( String sample : sampleNames ) + depthBySample.put(sample, 0); + } + } + } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java index ba5cbda0b..ab47ed0b7 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java @@ -13,15 +13,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam"; static HashMap expectations = new HashMap(); static { - expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618"); - expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c"); - expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557"); - expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b"); - expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd"); - expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7"); - expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb"); - expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab"); - expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1"); + expectations.put("-minMAPQ 1", "59c6071105a598e19f460640c35768c6"); + expectations.put("-minMAPQ 100", "e997fb5d61eaec21518722b0de90af20"); + expectations.put("-bySample", "160ffa185dbfa8b0d2dc57f60f5b1e48"); + expectations.put("-byRG", "dd3b4d040df7325dad4760ac6fa5252d"); + expectations.put("-minMAPQ 1 -bySample -byRG", "bd2a07ef548b86e82ac6cce534225612"); } @Test @@ -41,9 +37,9 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { @Test public void testDepthOfCoverage454() { WalkerTestSpec spec = new WalkerTestSpec( - "-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s", + "-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -o %s", 1, // just one output file - Arrays.asList("5c94916ec193ca825c681dd0175c6164")); + Arrays.asList("51203ba5ab928449cd01363af0b91510")); executeTest("testDepthOfCoverage454", spec); } } \ No newline at end of file