diff --git a/build.xml b/build.xml index bb59a6dac..3703c2d4f 100644 --- a/build.xml +++ b/build.xml @@ -323,8 +323,8 @@ - + diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 888dd35c8..9db0e09e6 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -358,7 +358,8 @@ public class GenomeAnalysisEngine { argCollection.downsampleCoverage, !argCollection.unsafe, filters, - argCollection.readMaxPileup); + argCollection.readMaxPileup, + walker.includeReadsWithDeletionAtLoci() ); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 1e09d2c75..3d8459743 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -31,6 +31,17 @@ public class Reads { private Boolean beSafe = null; private List supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT + private boolean includeReadsWithDeletionAtLoci = false; + + + /** + * Return true if the walker wants to see reads that contain deletions when looking at locus pileups + * + * @return + */ + public boolean includeReadsWithDeletionAtLoci() { + return includeReadsWithDeletionAtLoci; + } /** * Gets a list of the files acting as sources of reads. @@ -110,7 +121,8 @@ public class Reads { Integer downsampleCoverage, Boolean beSafe, List supplementalFilters, - int maximumReadsAtLocus) { + int maximumReadsAtLocus, + boolean includeReadsWithDeletionAtLoci) { this.readsFiles = samFiles; this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; @@ -118,5 +130,6 @@ public class Reads { this.beSafe = beSafe; this.supplementalFilters = supplementalFilters; this.maximumReadsAtLocus = maximumReadsAtLocus; + this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; } } diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index d6a9c179a..8f4fe5bb6 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -142,4 +142,39 @@ public class AlignmentContext { reads = downsampledReads; offsets = downsampledOffsets; } + + /** + * Returns only the reads in ac that do not contain spanning deletions of this locus + * + * @param ac + * @return + */ + public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) { + return subsetDeletions( ac, true ); + } + + /** + * Returns only the reads in ac that do contain spanning deletions of this locus + * + * @param ac + * @return + */ + public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) { + return subsetDeletions( ac, false ); + } + + private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) { + ArrayList reads = new ArrayList(ac.getReads().size()); + ArrayList offsets = new ArrayList(ac.getReads().size()); + for ( int i = 0; i < ac.getReads().size(); i++ ) { + SAMRecord read = ac.getReads().get(i); + int offset = ac.getOffsets().get(i); + if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) { + reads.add(read); + offsets.add(offset); + } + } + + return new AlignmentContext(ac.getLocation(), reads, offsets); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index b974f7e2d..04575a5ca 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -218,13 +218,15 @@ public class LocusIteratorByState extends LocusIterator { collectPendingReads(readInfo.getMaxReadsAtLocus()); // todo -- performance problem -- should be lazy, really - LinkedList reads = new LinkedList(); - LinkedList offsets = new LinkedList(); + ArrayList reads = new ArrayList(readStates.size()); + ArrayList offsets = new ArrayList(readStates.size()); for ( SAMRecordState state : readStates ) { - // todo -- enable D operation in alignment contexts if ( state.getCurrentCigarOperator() != CigarOperator.D ) { reads.add(state.getRead()); offsets.add(state.getReadOffset()); + } else if ( readInfo.includeReadsWithDeletionAtLoci() ) { + reads.add(state.getRead()); + offsets.add(-1); } } GenomeLoc loc = getLocation(); @@ -236,7 +238,7 @@ public class LocusIteratorByState extends LocusIterator { // printState(); //} - return new AlignmentContext(loc, reads, offsets); + return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets); } private void collectPendingReads(final int maximumPileupSize) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java index d2fe8df92..06b924062 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java @@ -29,18 +29,64 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Pair; +import net.sf.samtools.SAMRecord; /** * Display the depth of coverage at a given locus. */ public class DepthOfCoverageWalker extends LocusWalker> { - @Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false) - public boolean suppressPrinting = false; + enum printType { + NONE, + COMPACT, + DETAILED + } + + @Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false) + public printType printStyle = printType.COMPACT; + + @Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false) + public boolean excludeDeletionsInCoverage = false; + + @Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false) + public int excludeMAPQBelowThis = -1; + + public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; } + + public void initialize() { + switch ( printStyle ) { + case COMPACT: + out.printf("locus depth%n"); + break; + case DETAILED: + out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n"); + break; + } + } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( !suppressPrinting ) - out.printf("%s: %d%n", context.getLocation(), context.getReads().size() ); - return context.getReads().size(); + int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0; + + for ( int i = 0; i < context.getReads().size(); i++ ) { + SAMRecord read = context.getReads().get(i); + int offset = context.getOffsets().get(i); + + if ( read.getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++; + else if ( offset == -1 ) nDeletionReads++; + else nCleanReads++; + } + + int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads); + + switch ( printStyle ) { + case COMPACT: + out.printf("%s %8d%n", context.getLocation(), nTotalReads); + break; + case DETAILED: + out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads); + break; + } + + return nTotalReads; } public boolean isReduceByInterval() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java deleted file mode 100644 index 005c8c70e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java +++ /dev/null @@ -1,38 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers; - -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; - -// Null traversal. For ATK performance measuring. -// j.maguire 3-7-2009 - -public class NullWalker extends LocusWalker { - public void initialize() { - } - - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return true; // We are keeping all the reads - } - - // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) - { - return 1; - } - - // Given result of map function - public Integer reduceInit() - { - return 0; - } - public Integer reduce(Integer value, Integer sum) - { - return 0; - } - - public void onTraversalDone() { - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index ebb6933bc..db4de887b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -43,6 +43,24 @@ public abstract class Walker { return GenomeAnalysisEngine.instance; } + /** + * (conceptual static) method that states whether you want to see reads piling up at a locus + * that contain a deletion at the locus. + * + * ref: ATCTGA + * read1: ATCTGA + * read2: AT--GA + * + * Normally, the locus iterator only returns a list of read1 at this locus at position 3, but + * if this function returns true, then the system will return (read1, read2) with offsets + * of (3, -1). The -1 offset indicates a deletion in the read. + * + * @return false if you don't want to see deletions, or true if you do + */ + public boolean includeReadsWithDeletionAtLoci() { + return false; + } + public void initialize() { } /** diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index bd5dbe078..4b76c171f 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -461,6 +461,15 @@ public class Utils { return count; } + public static int countOccurrences(T x, List l) { + int count = 0; + for ( T y : l ) { + if ( x.equals(y) ) count++; + } + + return count; + } + public static byte listMaxByte(List quals) { if ( quals.size() == 0 ) return 0; byte m = quals.get(0); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java new file mode 100755 index 000000000..ba5cbda0b --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Arrays; +import java.util.List; +import java.io.File; + +public class DepthOfCoverageIntegrationTest extends WalkerTest { + private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam"; + static HashMap expectations = new HashMap(); + static { + expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618"); + expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c"); + expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557"); + expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b"); + expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd"); + expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7"); + expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb"); + expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab"); + expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1"); + } + + @Test + public void testDepthOfCoverage1() { + + for ( Map.Entry entry : expectations.entrySet() ) { + String extraArgs = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testDepthOfCoverage1", spec); + } + } + + @Test + public void testDepthOfCoverage454() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s", + 1, // just one output file + Arrays.asList("5c94916ec193ca825c681dd0175c6164")); + executeTest("testDepthOfCoverage454", spec); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index ea9d41393..0587e306f 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -49,18 +49,19 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { String md5 = entry.getValue(); String paramsFile = paramsFiles.get(bam); System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - - WalkerTestSpec spec = new WalkerTestSpec( - "-R /broad/1KG/reference/human_b36_both.fasta" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -T TableRecalibration" + - " -I " + bam + - " -L 1:10,000,000-20,000,000" + - " --outputBam %s" + - " --params " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibrator1", spec); + if ( paramsFile != null ) { + WalkerTestSpec spec = new WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -T TableRecalibration" + + " -I " + bam + + " -L 1:10,000,000-20,000,000" + + " --outputBam %s" + + " --params " + paramsFile, + 1, // just one output file + Arrays.asList(md5)); + executeTest("testTableRecalibrator1", spec); + } } } } \ No newline at end of file