From 6866a41914c808e9b1d63edbf4860349143cbd14 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 23 Feb 2012 09:45:47 -0500 Subject: [PATCH 01/14] Added functionality in pileups to not only determine whether there's an insertion or deletion following the current position, but to also get the indel length and involved bases - definitely needed for extended event removal, and needed for pool caller indel functionality. --- .../gatk/iterators/LocusIteratorByState.java | 15 ++++++--- .../pileup/AbstractReadBackedPileup.java | 1 + .../pileup/ExtendedEventPileupElement.java | 2 +- .../sting/utils/pileup/PileupElement.java | 31 ++++++++++++++++++- .../utils/pileup/ReadBackedPileupImpl.java | 10 ++++-- 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 6edae3816..b8dd03317 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -175,8 +175,8 @@ public class LocusIteratorByState extends LocusIterator { return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement); } - public CigarOperator peekForwardOnGenome() { - return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ).getOperator(); + public CigarElement peekForwardOnGenome() { + return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ); } public CigarOperator stepForwardOnGenome() { @@ -462,15 +462,19 @@ public class LocusIteratorByState extends LocusIterator { final SAMRecordState state = iterator.next(); // state object with the read/offset information final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator - final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator + final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element + final CigarOperator nextOp = nextElement.getOperator(); final int readOffset = state.getReadOffset(); // the base offset on this read + byte[] insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()); + int nextElementLength = nextElement.getLength(); if (op == CigarOperator.N) // N's are never added to any pileup continue; if (op == CigarOperator.D) { if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so - pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()))); + pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()), + null,nextOp == CigarOperator.D? nextElementLength:-1)); size++; nDeletions++; if (read.getMappingQuality() == 0) @@ -479,7 +483,8 @@ public class LocusIteratorByState extends LocusIterator { } else { if (!filterBaseInRead(read, location.getStart())) { - pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()))); + pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()), + new String(insertedBases),nextElementLength)); size++; if (read.getMappingQuality() == 0) nMQ0Reads++; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 70ad70f43..7c2a67aba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -205,6 +205,7 @@ public abstract class AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip); + protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip, String nextEventBases, int nextEventLength ); // -------------------------------------------------------- // diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 506442d03..8df0aa0b8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -48,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement { public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) { - super(read, offset, type == Type.DELETION, false, false, false); // extended events are slated for removal + super(read, offset, type == Type.DELETION, false, false, false,null,-1); // extended events are slated for removal this.read = read; this.offset = offset; this.eventLength = eventLength; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 9df22700e..022cadbbe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -27,6 +27,10 @@ public class PileupElement implements Comparable { protected final boolean isBeforeDeletion; protected final boolean isBeforeInsertion; protected final boolean isNextToSoftClip; + protected final int eventLength; + protected final String eventBases; // if it is a deletion, we do not have information about the actual deleted bases + // in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases + /** * Creates a new pileup element. @@ -37,12 +41,15 @@ public class PileupElement implements Comparable { * @param isBeforeDeletion whether or not this base is before a deletion * @param isBeforeInsertion whether or not this base is before an insertion * @param isNextToSoftClip whether or not this base is next to a soft clipped base + * @param nextEventBases bases in event in case element comes before insertion or deletion + * @param nextEventLength length of next event in case it's insertion or deletion */ @Requires({ "read != null", "offset >= -1", "offset <= read.getReadLength()"}) - public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip) { + public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip, + final String nextEventBases, final int nextEventLength) { if (offset < 0 && isDeletion) throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset"); @@ -52,6 +59,14 @@ public class PileupElement implements Comparable { this.isBeforeDeletion = isBeforeDeletion; this.isBeforeInsertion = isBeforeInsertion; this.isNextToSoftClip = isNextToSoftClip; + if (isBeforeInsertion) + eventBases = nextEventBases; + else + eventBases = null; // ignore argument in any other case + if (isBeforeDeletion || isBeforeInsertion) + eventLength = nextEventLength; + else + eventLength = -1; } public boolean isDeletion() { @@ -104,6 +119,20 @@ public class PileupElement implements Comparable { return getBaseDeletionQual(offset); } + /** + * Returns length of the event (number of inserted or deleted bases + */ + public int getEventLength() { + return eventLength; + } + + /** + * Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. + */ + public String getEventBases() { + return eventBases; + } + public int getMappingQual() { return read.getMappingQuality(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 7a6ebef21..759d64b2f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -71,7 +71,13 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup Date: Thu, 23 Feb 2012 12:14:48 -0500 Subject: [PATCH 03/14] Added support for breakpoint alleles -- See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic -- Added integrationtest to ensure that we can parse and write out breakpoint example --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 7 +++++-- .../sting/utils/variantcontext/Allele.java | 8 +++++++- .../utils/codecs/vcf/VCFIntegrationTest.java | 16 +++++++++++++++- public/testdata/breakpoint-example.vcf | 6 ++++++ 4 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 public/testdata/breakpoint-example.vcf diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 1bdee802b..3c2ed18e4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -544,12 +544,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { } /** - * return true if this is a symbolic allele (e.g. ) otherwise false + * return true if this is a symbolic allele (e.g. ) or + * structural variation breakend (with [ or ]), otherwise false * @param allele the allele to check * @return true if the allele is a symbolic allele, otherwise false */ private static boolean isSymbolicAllele(String allele) { - return (allele != null && allele.startsWith("<") && allele.endsWith(">") && allele.length() > 2); + return (allele != null && allele.length() > 2 && + ((allele.startsWith("<") && allele.endsWith(">")) || + (allele.contains("[") || allele.contains("]")))); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index c3f437f11..52b4109fe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -212,7 +212,13 @@ public class Allele implements Comparable { * @return true if the bases represent a symbolic allele */ public static boolean wouldBeSymbolicAllele(byte[] bases) { - return bases.length > 2 && bases[0] == '<' && bases[bases.length-1] == '>'; + if ( bases.length <= 2 ) + return false; + else { + final String strBases = new String(bases); + return (bases[0] == '<' && bases[bases.length-1] == '>') || + (strBases.contains("[") || strBases.contains("]")); + } } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index c8a0c0ed6..5de6f1417 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -9,7 +9,7 @@ import java.util.List; public class VCFIntegrationTest extends WalkerTest { - @Test + @Test(enabled = false) public void testReadingAndWritingWitHNoChanges() { String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447"; @@ -25,4 +25,18 @@ public class VCFIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); executeTest("Test Variants To VCF from new output", spec2); } + + @Test + // See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic + public void testReadingAndWritingBreakpointAlleles() { + String testVCF = testDir + "breakpoint-example.vcf"; + //String testVCF = validationDataLocation + "multiallelic.vcf"; + + String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("")); + executeTest("Test reading and writing breakpoint VCF", spec1); + } + } diff --git a/public/testdata/breakpoint-example.vcf b/public/testdata/breakpoint-example.vcf new file mode 100644 index 000000000..f015e1721 --- /dev/null +++ b/public/testdata/breakpoint-example.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +#CHROM POS ID REF ALT QUAL FILTER INFO +22 50 bnd_W G G]22:6000] 6 PASS SVTYPE=BND;MATEID=bnd_Y +22 51 bnd_V T ]22:55]T 6 PASS SVTYPE=BND;MATEID=bnd_U +22 55 bnd_U C C[22:51[ 6 PASS SVTYPE=BND;MATEID=bnd_V +22 6000 bnd_Y A A]22:50] 6 PASS SVTYPE=BND;MATEID=bnd_W \ No newline at end of file From ee9a56ad27add21961d7567627a0994ebbf53229 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 23 Feb 2012 18:25:01 -0500 Subject: [PATCH 04/14] Fix subtle bug in the ReduceReads stash reported by Adam * The tailSet generated every time we flush the reads stash is still being affected by subsequent clears because it is just a pointer to the parent element in the original TreeSet. This is dangerous, and there is a weird condition where the clear will affects it. * Fix by creating a new set, given the tailSet instead of trying to do magic with just the pointer. --- .../sting/utils/sam/AlignmentStartWithNoTiesComparator.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java index 02512c8dc..682c76617 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java @@ -36,8 +36,10 @@ public class AlignmentStartWithNoTiesComparator implements Comparator result = cmpContig; else { - if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1; - else result = 1; + if (r1.getAlignmentStart() < r2.getAlignmentStart()) + result = -1; + else + result = 1; } } From c9a4c74f7af270e0040ddfb40d0dcb8a350c175d Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Fri, 24 Feb 2012 10:27:59 -0500 Subject: [PATCH 06/14] a) Bug fixes for last commit related to PileupElements (unit tests are forthcoming). b) Changes needed to make pool caller work in GENOTYPE_GIVEN_ALLELES mode c) Bug fix (yet again) for UG when GENOTYPE_GIVEN_ALLELES and EMIT_ALL_SITES are on, when there's no coverage at site and when input vcf has genotypes: output vcf would still inherit genotypes from input vcf. Now, we just build vc from scratch instead of initializing from input vc. We just take location and alleles from vc --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 2 +- .../org/broadinstitute/sting/utils/pileup/PileupElement.java | 3 +++ .../utils/pileup/ReadBackedExtendedEventPileupImpl.java | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 0156890ac..a60cc64f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -253,7 +253,7 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).attributes(new HashMap()).filters(new HashSet()).make(); + vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), vcInput.getAlleles()).make(); } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 022cadbbe..9dbfc52f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -69,6 +69,9 @@ public class PileupElement implements Comparable { eventLength = -1; } + public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip) { + this(read,offset, isDeletion, isBeforeDeletion, isBeforeInsertion, isNextToSoftClip, null, -1); + } public boolean isDeletion() { return isDeletion; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index 357195daa..e547534dd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -99,6 +99,11 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) { throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); } + @Override + protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, + boolean isNextToSoftClip,String nextEventBases, int nextEventLength) { + throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); + } /** From 50de1a3eabffecb07f4219396a81d617cb4d56e3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 25 Feb 2012 11:26:36 -0500 Subject: [PATCH 12/14] Fixing bad VCFIntegration tests -- Left disabled a test that should have been enabled -- Didn't add the md5 to the test I actually added -- Now VCFIntegrationTests should be working! --- .../sting/utils/codecs/vcf/VCFIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 5de6f1417..ca5fcf419 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -9,7 +9,7 @@ import java.util.List; public class VCFIntegrationTest extends WalkerTest { - @Test(enabled = false) + @Test(enabled = true) public void testReadingAndWritingWitHNoChanges() { String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447"; @@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("76075307afd26b4db6234795d9fb3c2f")); executeTest("Test reading and writing breakpoint VCF", spec1); } From c8a06e53c17cb4a4c8d2e76f2ae24aa32ccbd6ae Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 25 Feb 2012 11:32:50 -0500 Subject: [PATCH 13/14] DoC now properly handles reference N bases + misc. additional cleanups -- DoC now by default ignores bases with reference Ns, so these are not included in the coverage calculations at any stage. -- Added option --includeRefNSites that will include them in the calculation -- Added integration tests that ensures the per base tables (and so all subsequent calculations) work with and without reference N bases included -- Reorganized command line options, tagging advanced options with @Advanced --- .../coverage/DepthOfCoverageWalker.java | 107 ++++++++++++------ .../DepthOfCoverageIntegrationTest.java | 10 ++ 2 files changed, 80 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index cbbb3d43f..7d1858a63 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -119,21 +120,6 @@ public class DepthOfCoverageWalker extends LocusWalker out; - /** - * Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin. - */ - @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) - int start = 1; - /** - * Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin. - */ - @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) - int stop = 500; - /** - * Sets the number of bins for granular binning - */ - @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) - int nBins = 499; @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) int minMappingQuality = -1; @Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false) @@ -142,16 +128,19 @@ public class DepthOfCoverageWalker extends LocusWalker END are counted in the last bin. + */ + @Advanced + @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) + int stop = 500; + /** + * Sets the number of bins for granular binning + */ + @Advanced + @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) + int nBins = 499; + /** * Do not tabulate the sample summary statistics (total, mean, median, quartile coverage per sample) */ @@ -174,27 +207,22 @@ public class DepthOfCoverageWalker extends LocusWalker partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); + /** * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. */ + @Advanced @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; + + @Advanced @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) boolean ignoreDeletionSites = false; - /** - * Path to the RefSeq file for use in aggregating coverage statistics over genes - */ - @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) - File refSeqGeneList = null; - /** - * The format of the output file - */ - @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) - String outputFormat = "rtable"; /** * A coverage threshold for summarizing (e.g. % bases >= CT for each sample) */ + @Advanced @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) int[] coverageThresholds = {15}; @@ -334,24 +362,29 @@ public class DepthOfCoverageWalker extends LocusWalker> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (includeRefNBases || BaseUtils.isRegularBase(ref.getBase())) { + if ( ! omitDepthOutput ) { + getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary).printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) + //System.out.printf("\t[log]\t%s",ref.getLocus()); + } - if ( ! omitDepthOutput ) { - getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary).printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) - //System.out.printf("\t[log]\t%s",ref.getLocus()); + return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes); + } else { + return null; } - - return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes); } public CoveragePartitioner reduce(Map> thisMap, CoveragePartitioner prevReduce) { - if ( ! omitDepthOutput ) { - //checkOrder(prevReduce); // tests prevReduce.getIdentifiersByType().get(t) against the initialized header order - printDepths(getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary),thisMap,prevReduce.getIdentifiersByType()); - // this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without - // turning on omit - } + if ( thisMap != null ) { // skip sites we didn't want to include in the calculation (ref Ns) + if ( ! omitDepthOutput ) { + //checkOrder(prevReduce); // tests prevReduce.getIdentifiersByType().get(t) against the initialized header order + printDepths(getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary),thisMap,prevReduce.getIdentifiersByType()); + // this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without + // turning on omit + } - prevReduce.update(thisMap); // note that in "useBoth" cases, this method alters the thisMap object + prevReduce.update(thisMap); // note that in "useBoth" cases, this method alters the thisMap object + } return prevReduce; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index 1c58346b4..6f1370008 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -94,4 +94,14 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { execute("testNoCoverageDueToFiltering",spec); } + + public void testRefNHandling(boolean includeNs, final String md5) { + String command = "-R " + b37KGReference + " -L 20:26,319,565-26,319,575 -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -T DepthOfCoverage -baseCounts --omitIntervalStatistics --omitLocusTable --omitPerSampleStats -o %s"; + if ( includeNs ) command += " --includeRefNSites"; + WalkerTestSpec spec = new WalkerTestSpec(command, 1, Arrays.asList(md5)); + executeTest("Testing DoC " + (includeNs ? "with" : "without") + " reference Ns", spec); + } + + @Test public void testRefNWithNs() { testRefNHandling(true, "24cd2da2e4323ce6fd76217ba6dc2834"); } + @Test public void testRefNWithoutNs() { testRefNHandling(false, "4fc0f1a2e968f777d693abcefd4fb7af"); } } From dea35943d17830496645dcab9cd59a063b258e34 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 25 Feb 2012 13:57:28 -0500 Subject: [PATCH 14/14] a) Bug fix in calling new functions that give indel bases and length from regular pileup in LocusIteratorByState, b) Added unit test to cover these. --- .../gatk/iterators/LocusIteratorByState.java | 8 ++- .../LocusIteratorByStateUnitTest.java | 50 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index b8dd03317..a47c61d0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -465,7 +465,7 @@ public class LocusIteratorByState extends LocusIterator { final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element final CigarOperator nextOp = nextElement.getOperator(); final int readOffset = state.getReadOffset(); // the base offset on this read - byte[] insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()); + int nextElementLength = nextElement.getLength(); if (op == CigarOperator.N) // N's are never added to any pileup @@ -483,8 +483,12 @@ public class LocusIteratorByState extends LocusIterator { } else { if (!filterBaseInRead(read, location.getStart())) { + String insertedBaseString = null; + if (nextOp == CigarOperator.I) { + insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength())); + } pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()), - new String(insertedBases),nextElementLength)); + insertedBaseString,nextElementLength)); size++; if (read.getMappingQuality() == 0) nMQ0Reads++; diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 04e11db54..7282d6c48 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -6,6 +6,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; @@ -85,6 +86,55 @@ public class LocusIteratorByStateUnitTest extends BaseTest { Assert.assertTrue(foundExtendedEventPileup,"Extended event pileup not found"); } + @Test + public void testIndelsInRegularPileup() { + final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; + final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'}; + + // create a test version of the Reads object + ReadProperties readAttributes = createTestReadProperties(); + JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); + + SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); + before.setReadBases(bases); + before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); + before.setCigarString("10M"); + + SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10); + during.setReadBases(indelBases); + during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); + during.setCigarString("4M2I6M"); + + SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10); + after.setReadBases(bases); + after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); + after.setCigarString("10M"); + + List reads = Arrays.asList(before,during,after); + + // create the iterator by state with the fake reads and fake records + li = makeLTBS(reads,readAttributes); + + boolean foundIndel = false; + while (li.hasNext()) { + AlignmentContext context = li.next(); + if(!context.hasBasePileup()) + continue; + + ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10); + for (PileupElement p : pileup) { + if (p.isBeforeInsertion()) { + foundIndel = true; + Assert.assertEquals(p.getEventLength(), 2, "Wrong event length"); + Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect"); + break; + } + } + + } + + Assert.assertTrue(foundIndel,"Indel in pileup not found"); + } /** * Right now, the GATK's extended event pileup DOES NOT include reads which stop immediately before an insertion