diff --git a/build.xml b/build.xml
index bb59a6dac..3703c2d4f 100644
--- a/build.xml
+++ b/build.xml
@@ -323,8 +323,8 @@
-
+
diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 888dd35c8..9db0e09e6 100755
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -358,7 +358,8 @@ public class GenomeAnalysisEngine {
argCollection.downsampleCoverage,
!argCollection.unsafe,
filters,
- argCollection.readMaxPileup);
+ argCollection.readMaxPileup,
+ walker.includeReadsWithDeletionAtLoci() );
}
/**
diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java
index 1e09d2c75..3d8459743 100755
--- a/java/src/org/broadinstitute/sting/gatk/Reads.java
+++ b/java/src/org/broadinstitute/sting/gatk/Reads.java
@@ -31,6 +31,17 @@ public class Reads {
private Boolean beSafe = null;
private List supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
+ private boolean includeReadsWithDeletionAtLoci = false;
+
+
+ /**
+ * Return true if the walker wants to see reads that contain deletions when looking at locus pileups
+ *
+ * @return
+ */
+ public boolean includeReadsWithDeletionAtLoci() {
+ return includeReadsWithDeletionAtLoci;
+ }
/**
* Gets a list of the files acting as sources of reads.
@@ -110,7 +121,8 @@ public class Reads {
Integer downsampleCoverage,
Boolean beSafe,
List supplementalFilters,
- int maximumReadsAtLocus) {
+ int maximumReadsAtLocus,
+ boolean includeReadsWithDeletionAtLoci) {
this.readsFiles = samFiles;
this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction;
@@ -118,5 +130,6 @@ public class Reads {
this.beSafe = beSafe;
this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
+ this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
}
}
diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
index d6a9c179a..8f4fe5bb6 100755
--- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
+++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
@@ -142,4 +142,39 @@ public class AlignmentContext {
reads = downsampledReads;
offsets = downsampledOffsets;
}
+
+ /**
+ * Returns only the reads in ac that do not contain spanning deletions of this locus
+ *
+ * @param ac
+ * @return
+ */
+ public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
+ return subsetDeletions( ac, true );
+ }
+
+ /**
+ * Returns only the reads in ac that do contain spanning deletions of this locus
+ *
+ * @param ac
+ * @return
+ */
+ public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
+ return subsetDeletions( ac, false );
+ }
+
+ private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
+ ArrayList reads = new ArrayList(ac.getReads().size());
+ ArrayList offsets = new ArrayList(ac.getReads().size());
+ for ( int i = 0; i < ac.getReads().size(); i++ ) {
+ SAMRecord read = ac.getReads().get(i);
+ int offset = ac.getOffsets().get(i);
+ if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
+ reads.add(read);
+ offsets.add(offset);
+ }
+ }
+
+ return new AlignmentContext(ac.getLocation(), reads, offsets);
+ }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
index b974f7e2d..04575a5ca 100755
--- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@@ -218,13 +218,15 @@ public class LocusIteratorByState extends LocusIterator {
collectPendingReads(readInfo.getMaxReadsAtLocus());
// todo -- performance problem -- should be lazy, really
- LinkedList reads = new LinkedList();
- LinkedList offsets = new LinkedList();
+ ArrayList reads = new ArrayList(readStates.size());
+ ArrayList offsets = new ArrayList(readStates.size());
for ( SAMRecordState state : readStates ) {
- // todo -- enable D operation in alignment contexts
if ( state.getCurrentCigarOperator() != CigarOperator.D ) {
reads.add(state.getRead());
offsets.add(state.getReadOffset());
+ } else if ( readInfo.includeReadsWithDeletionAtLoci() ) {
+ reads.add(state.getRead());
+ offsets.add(-1);
}
}
GenomeLoc loc = getLocation();
@@ -236,7 +238,7 @@ public class LocusIteratorByState extends LocusIterator {
// printState();
//}
- return new AlignmentContext(loc, reads, offsets);
+ return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets);
}
private void collectPendingReads(final int maximumPileupSize) {
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java
index d2fe8df92..06b924062 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java
@@ -29,18 +29,64 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
+import net.sf.samtools.SAMRecord;
/**
* Display the depth of coverage at a given locus.
*/
public class DepthOfCoverageWalker extends LocusWalker> {
- @Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
- public boolean suppressPrinting = false;
+ enum printType {
+ NONE,
+ COMPACT,
+ DETAILED
+ }
+
+ @Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
+ public printType printStyle = printType.COMPACT;
+
+ @Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
+ public boolean excludeDeletionsInCoverage = false;
+
+ @Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
+ public int excludeMAPQBelowThis = -1;
+
+ public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
+
+ public void initialize() {
+ switch ( printStyle ) {
+ case COMPACT:
+ out.printf("locus depth%n");
+ break;
+ case DETAILED:
+ out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
+ break;
+ }
+ }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
- if ( !suppressPrinting )
- out.printf("%s: %d%n", context.getLocation(), context.getReads().size() );
- return context.getReads().size();
+ int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
+
+ for ( int i = 0; i < context.getReads().size(); i++ ) {
+ SAMRecord read = context.getReads().get(i);
+ int offset = context.getOffsets().get(i);
+
+ if ( read.getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++;
+ else if ( offset == -1 ) nDeletionReads++;
+ else nCleanReads++;
+ }
+
+ int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
+
+ switch ( printStyle ) {
+ case COMPACT:
+ out.printf("%s %8d%n", context.getLocation(), nTotalReads);
+ break;
+ case DETAILED:
+ out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads);
+ break;
+ }
+
+ return nTotalReads;
}
public boolean isReduceByInterval() {
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java
deleted file mode 100644
index 005c8c70e..000000000
--- a/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers;
-
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-
-// Null traversal. For ATK performance measuring.
-// j.maguire 3-7-2009
-
-public class NullWalker extends LocusWalker {
- public void initialize() {
- }
-
- // Do we actually want to operate on the context?
- public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
- return true; // We are keeping all the reads
- }
-
- // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
- public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
- {
- return 1;
- }
-
- // Given result of map function
- public Integer reduceInit()
- {
- return 0;
- }
- public Integer reduce(Integer value, Integer sum)
- {
- return 0;
- }
-
- public void onTraversalDone() {
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
index ebb6933bc..db4de887b 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@@ -43,6 +43,24 @@ public abstract class Walker {
return GenomeAnalysisEngine.instance;
}
+ /**
+ * (conceptual static) method that states whether you want to see reads piling up at a locus
+ * that contain a deletion at the locus.
+ *
+ * ref: ATCTGA
+ * read1: ATCTGA
+ * read2: AT--GA
+ *
+ * Normally, the locus iterator only returns a list of read1 at this locus at position 3, but
+ * if this function returns true, then the system will return (read1, read2) with offsets
+ * of (3, -1). The -1 offset indicates a deletion in the read.
+ *
+ * @return false if you don't want to see deletions, or true if you do
+ */
+ public boolean includeReadsWithDeletionAtLoci() {
+ return false;
+ }
+
public void initialize() { }
/**
diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java
index bd5dbe078..4b76c171f 100755
--- a/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -461,6 +461,15 @@ public class Utils {
return count;
}
+ public static int countOccurrences(T x, List l) {
+ int count = 0;
+ for ( T y : l ) {
+ if ( x.equals(y) ) count++;
+ }
+
+ return count;
+ }
+
public static byte listMaxByte(List quals) {
if ( quals.size() == 0 ) return 0;
byte m = quals.get(0);
diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java
new file mode 100755
index 000000000..ba5cbda0b
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java
@@ -0,0 +1,49 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Arrays;
+import java.util.List;
+import java.io.File;
+
+public class DepthOfCoverageIntegrationTest extends WalkerTest {
+ private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
+ static HashMap expectations = new HashMap();
+ static {
+ expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618");
+ expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c");
+ expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557");
+ expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b");
+ expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd");
+ expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7");
+ expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb");
+ expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab");
+ expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1");
+ }
+
+ @Test
+ public void testDepthOfCoverage1() {
+
+ for ( Map.Entry entry : expectations.entrySet() ) {
+ String extraArgs = entry.getKey();
+ String md5 = entry.getValue();
+
+ WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
+ 1, // just one output file
+ Arrays.asList(md5));
+ executeTest("testDepthOfCoverage1", spec);
+ }
+ }
+
+ @Test
+ public void testDepthOfCoverage454() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s",
+ 1, // just one output file
+ Arrays.asList("5c94916ec193ca825c681dd0175c6164"));
+ executeTest("testDepthOfCoverage454", spec);
+ }
+}
\ No newline at end of file
diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
index ea9d41393..0587e306f 100755
--- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
@@ -49,18 +49,19 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
String md5 = entry.getValue();
String paramsFile = paramsFiles.get(bam);
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
-
- WalkerTestSpec spec = new WalkerTestSpec(
- "-R /broad/1KG/reference/human_b36_both.fasta" +
- " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
- " -T TableRecalibration" +
- " -I " + bam +
- " -L 1:10,000,000-20,000,000" +
- " --outputBam %s" +
- " --params " + paramsFile,
- 1, // just one output file
- Arrays.asList(md5));
- executeTest("testTableRecalibrator1", spec);
+ if ( paramsFile != null ) {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-R /broad/1KG/reference/human_b36_both.fasta" +
+ " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
+ " -T TableRecalibration" +
+ " -I " + bam +
+ " -L 1:10,000,000-20,000,000" +
+ " --outputBam %s" +
+ " --params " + paramsFile,
+ 1, // just one output file
+ Arrays.asList(md5));
+ executeTest("testTableRecalibrator1", spec);
+ }
}
}
}
\ No newline at end of file