Refactored DoCWalker to output in a more helpful and usable style. It now outputs in tabular format with 2 different sections: per locus and then per interval.
I am now at a point where I can merge the functionality from other coverage walkers into this one. Thanks to Andrew for input. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2239 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d7e4cd4c82
commit
a88202c3f6
|
|
@ -28,7 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ import java.util.*;
|
|||
* Display the depth of coverage at a given locus.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>> {
|
||||
public class DepthOfCoverageWalker extends LocusWalker<DepthOfCoverageWalker.DoCInfo, DepthOfCoverageWalker.DoCInfo> {
|
||||
enum printType {
|
||||
NONE,
|
||||
COMPACT,
|
||||
|
|
@ -48,10 +48,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
|
||||
protected printType printStyle = printType.COMPACT;
|
||||
|
||||
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
|
||||
protected boolean excludeDeletionsInCoverage = false;
|
||||
|
||||
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
|
||||
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will also list read counts with MAPQ >= this value at a locus in coverage",required=false)
|
||||
protected int excludeMAPQBelowThis = -1;
|
||||
|
||||
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
|
||||
|
|
@ -60,23 +57,13 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
@Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample")
|
||||
protected boolean bySample = false;
|
||||
|
||||
|
||||
// keep track of the read group and sample names
|
||||
private TreeSet<String> readGroupNames = new TreeSet<String>();
|
||||
private TreeSet<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
|
||||
|
||||
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
public void initialize() {
|
||||
switch ( printStyle ) {
|
||||
case COMPACT:
|
||||
out.printf("locus depth%n");
|
||||
break;
|
||||
case DETAILED:
|
||||
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
|
||||
break;
|
||||
}
|
||||
|
||||
if ( byReadGroup ) {
|
||||
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||
|
|
@ -92,24 +79,41 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
sampleNames.add(sample);
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder header = new StringBuilder("location\ttotal_coverage\tcoverage_without_deletions");
|
||||
if ( excludeMAPQBelowThis > 0 ) {
|
||||
header.append("\tcoverage_atleast_MQ");
|
||||
header.append(excludeMAPQBelowThis);
|
||||
}
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
header.append("\tcoverage_for_");
|
||||
header.append(rg);
|
||||
}
|
||||
}
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
header.append("\tcoverage_for_");
|
||||
header.append(sample);
|
||||
}
|
||||
}
|
||||
out.println(header.toString());
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
|
||||
|
||||
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
|
||||
for ( String readGroupName : readGroupNames )
|
||||
depthByReadGroup.put(readGroupName, 0);
|
||||
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
|
||||
for ( String sample : sampleNames )
|
||||
depthBySample.put(sample, 0);
|
||||
public DoCInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
|
||||
DoCInfo info = new DoCInfo();
|
||||
info.totalCoverage = pileup.size();
|
||||
|
||||
int nBadMAPQReads = 0, nDeletionReads = 0;
|
||||
for ( PileupElement p : pileup ) {
|
||||
|
||||
if ( p.getRead().getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++;
|
||||
else if ( p.isDeletion() ) nDeletionReads++;
|
||||
else nCleanReads++;
|
||||
if ( excludeMAPQBelowThis > 0 && p.getRead().getMappingQuality() < excludeMAPQBelowThis )
|
||||
nBadMAPQReads++;
|
||||
else if ( p.isDeletion() )
|
||||
nDeletionReads++;
|
||||
|
||||
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
|
||||
if ( readGroup == null )
|
||||
|
|
@ -117,59 +121,143 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
|
||||
if ( byReadGroup ) {
|
||||
String readGroupName = readGroup.getReadGroupId();
|
||||
int oldDepth = depthByReadGroup.get(readGroupName);
|
||||
depthByReadGroup.put(readGroupName, oldDepth + 1);
|
||||
int oldDepth = info.depthByReadGroup.get(readGroupName);
|
||||
info.depthByReadGroup.put(readGroupName, oldDepth + 1);
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
String sample = readGroup.getSample();
|
||||
if ( sample != null ) {
|
||||
int oldDepth = depthBySample.get(sample);
|
||||
depthBySample.put(sample, oldDepth + 1);
|
||||
int oldDepth = info.depthBySample.get(sample);
|
||||
info.depthBySample.put(sample, oldDepth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
|
||||
info.numDeletions = nDeletionReads;
|
||||
if ( excludeMAPQBelowThis > 0 )
|
||||
info.numBadMQReads = nBadMAPQReads;
|
||||
|
||||
switch ( printStyle ) {
|
||||
case COMPACT:
|
||||
out.printf("%s %8d%n", context.getLocation(), nTotalReads);
|
||||
break;
|
||||
case DETAILED:
|
||||
out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads);
|
||||
break;
|
||||
}
|
||||
printDoCInfo(context.getLocation(), info, false);
|
||||
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
|
||||
}
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
out.printf(" %s %8d%n", sample, depthBySample.get(sample));
|
||||
}
|
||||
}
|
||||
|
||||
return nTotalReads;
|
||||
return info;
|
||||
}
|
||||
|
||||
public boolean isReduceByInterval() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public Pair<Long, Long> reduceInit() { return new Pair<Long,Long>(0l,0l); }
|
||||
public DoCInfo reduceInit() { return new DoCInfo(); }
|
||||
|
||||
public Pair<Long, Long> reduce(Integer value, Pair<Long, Long> sum) {
|
||||
long left = value.longValue() + sum.getFirst();
|
||||
long right = sum.getSecond() + 1l;
|
||||
return new Pair<Long,Long>(left, right);
|
||||
public DoCInfo reduce(DoCInfo value, DoCInfo sum) {
|
||||
sum.totalCoverage += value.totalCoverage;
|
||||
sum.numDeletions += value.numDeletions;
|
||||
sum.numBadMQReads += value.numBadMQReads;
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
int oldDepth = sum.depthByReadGroup.get(rg);
|
||||
sum.depthByReadGroup.put(rg, oldDepth + value.depthByReadGroup.get(rg));
|
||||
}
|
||||
}
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
int oldDepth = sum.depthBySample.get(sample);
|
||||
sum.depthBySample.put(sample, oldDepth + value.depthBySample.get(sample));
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Pair<Long, Long> result) {
|
||||
out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n",
|
||||
((double)result.getFirst() / result.getSecond()), result.getFirst(), result.getSecond());
|
||||
@Override
|
||||
public void onTraversalDone(List<Pair<GenomeLoc, DoCInfo>> results) {
|
||||
|
||||
StringBuilder header = new StringBuilder("\nlocation\ttotal_coverage\taverage_coverage\tcoverage_without_deletions\taverage_coverage_without_deletions");
|
||||
if ( excludeMAPQBelowThis > 0 ) {
|
||||
header.append("\tcoverage_atleast_MQ");
|
||||
header.append(excludeMAPQBelowThis);
|
||||
header.append("\taverage_coverage_atleast_MQ");
|
||||
header.append(excludeMAPQBelowThis);
|
||||
}
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
header.append("\tcoverage_for_");
|
||||
header.append(rg);
|
||||
}
|
||||
}
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
header.append("\tcoverage_for_");
|
||||
header.append(sample);
|
||||
}
|
||||
}
|
||||
out.println(header.toString());
|
||||
|
||||
for ( Pair<GenomeLoc, DoCInfo> result : results )
|
||||
printDoCInfo(result.first, result.second, true);
|
||||
}
|
||||
|
||||
private void printDoCInfo(GenomeLoc loc, DoCInfo info, boolean printAverageCoverage) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(loc);
|
||||
sb.append("\t");
|
||||
sb.append(info.totalCoverage);
|
||||
sb.append("\t");
|
||||
if ( printAverageCoverage ) {
|
||||
sb.append(String.format("%.2f", ((double)info.totalCoverage) / (double)(loc.getStop() - loc.getStart() + 1)));
|
||||
sb.append("\t");
|
||||
}
|
||||
sb.append((info.totalCoverage - info.numDeletions));
|
||||
if ( printAverageCoverage ) {
|
||||
sb.append("\t");
|
||||
sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numDeletions)) / (double)(loc.getStop() - loc.getStart() + 1)));
|
||||
}
|
||||
if ( excludeMAPQBelowThis > 0 ) {
|
||||
sb.append("\t");
|
||||
sb.append((info.totalCoverage - info.numBadMQReads));
|
||||
if ( printAverageCoverage ) {
|
||||
sb.append("\t");
|
||||
sb.append(String.format("%.2f", ((double)(info.totalCoverage - info.numBadMQReads)) / (double)(loc.getStop() - loc.getStart() + 1)));
|
||||
}
|
||||
}
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
sb.append("\t");
|
||||
sb.append(String.format("%8d", info.depthByReadGroup.get(rg)));
|
||||
}
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
sb.append("\t");
|
||||
sb.append(String.format("%8d", info.depthBySample.get(sample)));
|
||||
}
|
||||
}
|
||||
|
||||
out.println(sb.toString());
|
||||
}
|
||||
|
||||
public class DoCInfo {
|
||||
public int totalCoverage = 0;
|
||||
public int numDeletions = 0;
|
||||
public int numBadMQReads = 0;
|
||||
|
||||
public HashMap<String, Integer> depthByReadGroup = null;
|
||||
public HashMap<String, Integer> depthBySample = null;
|
||||
|
||||
public DoCInfo() {
|
||||
if ( byReadGroup ) {
|
||||
depthByReadGroup = new HashMap<String, Integer>();
|
||||
for ( String readGroupName : readGroupNames )
|
||||
depthByReadGroup.put(readGroupName, 0);
|
||||
}
|
||||
if ( bySample ) {
|
||||
depthBySample = new HashMap<String, Integer>();
|
||||
for ( String sample : sampleNames )
|
||||
depthBySample.put(sample, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,15 +13,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
|
||||
static HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
static {
|
||||
expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618");
|
||||
expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c");
|
||||
expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557");
|
||||
expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b");
|
||||
expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd");
|
||||
expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7");
|
||||
expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb");
|
||||
expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab");
|
||||
expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1");
|
||||
expectations.put("-minMAPQ 1", "59c6071105a598e19f460640c35768c6");
|
||||
expectations.put("-minMAPQ 100", "e997fb5d61eaec21518722b0de90af20");
|
||||
expectations.put("-bySample", "160ffa185dbfa8b0d2dc57f60f5b1e48");
|
||||
expectations.put("-byRG", "dd3b4d040df7325dad4760ac6fa5252d");
|
||||
expectations.put("-minMAPQ 1 -bySample -byRG", "bd2a07ef548b86e82ac6cce534225612");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -41,9 +37,9 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testDepthOfCoverage454() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s",
|
||||
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList("5c94916ec193ca825c681dd0175c6164"));
|
||||
Arrays.asList("51203ba5ab928449cd01363af0b91510"));
|
||||
executeTest("testDepthOfCoverage454", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue