Refactored DoCWalker to output in a more helpful and usable style. It now outputs in tabular format with 2 different sections: per locus and then per interval.

I am now at a point where I can merge the functionality from other coverage walkers into this one.
Thanks to Andrew for input.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2239 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-03 05:28:21 +00:00
parent d7e4cd4c82
commit a88202c3f6
2 changed files with 159 additions and 75 deletions

View File

@ -28,7 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import net.sf.samtools.SAMReadGroupRecord;
@ -38,7 +38,7 @@ import java.util.*;
* Display the depth of coverage at a given locus.
*/
@By(DataSource.REFERENCE)
public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>> {
public class DepthOfCoverageWalker extends LocusWalker<DepthOfCoverageWalker.DoCInfo, DepthOfCoverageWalker.DoCInfo> {
enum printType {
NONE,
COMPACT,
@ -48,10 +48,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
protected printType printStyle = printType.COMPACT;
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
protected boolean excludeDeletionsInCoverage = false;
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will also list read counts with MAPQ >= this value at a locus in coverage",required=false)
protected int excludeMAPQBelowThis = -1;
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
@ -60,23 +57,13 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
@Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample")
protected boolean bySample = false;
// keep track of the read group and sample names
private TreeSet<String> readGroupNames = new TreeSet<String>();
private TreeSet<String> sampleNames = new TreeSet<String>();
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
public boolean includeReadsWithDeletionAtLoci() { return true; }
public void initialize() {
switch ( printStyle ) {
case COMPACT:
out.printf("locus depth%n");
break;
case DETAILED:
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
break;
}
if ( byReadGroup ) {
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
@ -92,24 +79,41 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
sampleNames.add(sample);
}
}
StringBuilder header = new StringBuilder("location\ttotal_coverage\tcoverage_without_deletions");
if ( excludeMAPQBelowThis > 0 ) {
header.append("\tcoverage_atleast_MQ");
header.append(excludeMAPQBelowThis);
}
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
header.append("\tcoverage_for_");
header.append(rg);
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
header.append("\tcoverage_for_");
header.append(sample);
}
}
out.println(header.toString());
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
for ( String readGroupName : readGroupNames )
depthByReadGroup.put(readGroupName, 0);
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
for ( String sample : sampleNames )
depthBySample.put(sample, 0);
public DoCInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = context.getPileup();
DoCInfo info = new DoCInfo();
info.totalCoverage = pileup.size();
int nBadMAPQReads = 0, nDeletionReads = 0;
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++;
else if ( p.isDeletion() ) nDeletionReads++;
else nCleanReads++;
if ( excludeMAPQBelowThis > 0 && p.getRead().getMappingQuality() < excludeMAPQBelowThis )
nBadMAPQReads++;
else if ( p.isDeletion() )
nDeletionReads++;
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
if ( readGroup == null )
@ -117,59 +121,143 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
if ( byReadGroup ) {
String readGroupName = readGroup.getReadGroupId();
int oldDepth = depthByReadGroup.get(readGroupName);
depthByReadGroup.put(readGroupName, oldDepth + 1);
int oldDepth = info.depthByReadGroup.get(readGroupName);
info.depthByReadGroup.put(readGroupName, oldDepth + 1);
}
if ( bySample ) {
String sample = readGroup.getSample();
if ( sample != null ) {
int oldDepth = depthBySample.get(sample);
depthBySample.put(sample, oldDepth + 1);
int oldDepth = info.depthBySample.get(sample);
info.depthBySample.put(sample, oldDepth + 1);
}
}
}
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
info.numDeletions = nDeletionReads;
if ( excludeMAPQBelowThis > 0 )
info.numBadMQReads = nBadMAPQReads;
switch ( printStyle ) {
case COMPACT:
out.printf("%s %8d%n", context.getLocation(), nTotalReads);
break;
case DETAILED:
out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads);
break;
}
printDoCInfo(context.getLocation(), info, false);
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
out.printf(" %s %8d%n", sample, depthBySample.get(sample));
}
}
return nTotalReads;
return info;
}
public boolean isReduceByInterval() {
return true;
}
public Pair<Long, Long> reduceInit() { return new Pair<Long,Long>(0l,0l); }
public DoCInfo reduceInit() { return new DoCInfo(); }
public Pair<Long, Long> reduce(Integer value, Pair<Long, Long> sum) {
long left = value.longValue() + sum.getFirst();
long right = sum.getSecond() + 1l;
return new Pair<Long,Long>(left, right);
public DoCInfo reduce(DoCInfo value, DoCInfo sum) {
sum.totalCoverage += value.totalCoverage;
sum.numDeletions += value.numDeletions;
sum.numBadMQReads += value.numBadMQReads;
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
int oldDepth = sum.depthByReadGroup.get(rg);
sum.depthByReadGroup.put(rg, oldDepth + value.depthByReadGroup.get(rg));
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
int oldDepth = sum.depthBySample.get(sample);
sum.depthBySample.put(sample, oldDepth + value.depthBySample.get(sample));
}
}
return sum;
}
public void onTraversalDone(Pair<Long, Long> result) {
out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n",
((double)result.getFirst() / result.getSecond()), result.getFirst(), result.getSecond());
@Override
public void onTraversalDone(List<Pair<GenomeLoc, DoCInfo>> results) {
StringBuilder header = new StringBuilder("\nlocation\ttotal_coverage\taverage_coverage\tcoverage_without_deletions\taverage_coverage_without_deletions");
if ( excludeMAPQBelowThis > 0 ) {
header.append("\tcoverage_atleast_MQ");
header.append(excludeMAPQBelowThis);
header.append("\taverage_coverage_atleast_MQ");
header.append(excludeMAPQBelowThis);
}
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
header.append("\tcoverage_for_");
header.append(rg);
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
header.append("\tcoverage_for_");
header.append(sample);
}
}
out.println(header.toString());
for ( Pair<GenomeLoc, DoCInfo> result : results )
printDoCInfo(result.first, result.second, true);
}
private void printDoCInfo(GenomeLoc loc, DoCInfo info, boolean printAverageCoverage) {
StringBuilder sb = new StringBuilder();
sb.append(loc);
sb.append("\t");
sb.append(info.totalCoverage);
sb.append("\t");
if ( printAverageCoverage ) {
sb.append(String.format("%.2f", ((double)info.totalCoverage) / (double)(loc.getStop() - loc.getStart() + 1)));
sb.append("\t");
}
sb.append((info.totalCoverage - info.numDeletions));
if ( printAverageCoverage ) {
sb.append("\t");
sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numDeletions)) / (double)(loc.getStop() - loc.getStart() + 1)));
}
if ( excludeMAPQBelowThis > 0 ) {
sb.append("\t");
sb.append((info.totalCoverage - info.numBadMQReads));
if ( printAverageCoverage ) {
sb.append("\t");
sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numBadMQReads)) / (double)(loc.getStop() - loc.getStart() + 1)));
}
}
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
sb.append("\t");
sb.append(String.format("%8d", info.depthByReadGroup.get(rg)));
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
sb.append("\t");
sb.append(String.format("%8d", info.depthBySample.get(sample)));
}
}
out.println(sb.toString());
}
public class DoCInfo {
public int totalCoverage = 0;
public int numDeletions = 0;
public int numBadMQReads = 0;
public HashMap<String, Integer> depthByReadGroup = null;
public HashMap<String, Integer> depthBySample = null;
public DoCInfo() {
if ( byReadGroup ) {
depthByReadGroup = new HashMap<String, Integer>();
for ( String readGroupName : readGroupNames )
depthByReadGroup.put(readGroupName, 0);
}
if ( bySample ) {
depthBySample = new HashMap<String, Integer>();
for ( String sample : sampleNames )
depthBySample.put(sample, 0);
}
}
}
}

View File

@ -13,15 +13,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618");
expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c");
expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557");
expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b");
expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd");
expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7");
expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb");
expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab");
expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1");
expectations.put("-minMAPQ 1", "59c6071105a598e19f460640c35768c6");
expectations.put("-minMAPQ 100", "e997fb5d61eaec21518722b0de90af20");
expectations.put("-bySample", "160ffa185dbfa8b0d2dc57f60f5b1e48");
expectations.put("-byRG", "dd3b4d040df7325dad4760ac6fa5252d");
expectations.put("-minMAPQ 1 -bySample -byRG", "bd2a07ef548b86e82ac6cce534225612");
}
@Test
@ -41,9 +37,9 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
@Test
public void testDepthOfCoverage454() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s",
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -o %s",
1, // just one output file
Arrays.asList("5c94916ec193ca825c681dd0175c6164"));
Arrays.asList("51203ba5ab928449cd01363af0b91510"));
executeTest("testDepthOfCoverage454", spec);
}
}