From 96034aee0e53e6475b33e82f583097c7eaa09bc5 Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 16 Jul 2010 18:57:58 +0000 Subject: [PATCH] Cleanup for Steve Hershman's issue. In the midst of doing this, I discovered that the semantics for which reads are in an extended event pileup are not clear at this point. Eric and I have planned a future clarification for this and the two of us will discuss who will implement this clarification and when it'll happen. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3809 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/contexts/AlignmentContext.java | 48 ++----- .../gatk/iterators/LocusIteratorByState.java | 9 ++ .../pileup/AbstractReadBackedPileup.java | 5 +- .../pileup/ExtendedEventPileupElement.java | 24 ++++ .../sting/utils/pileup/PileupElement.java | 24 +++- .../LocusIteratorByStateUnitTest.java | 128 ++++++++++++++---- 6 files changed, 177 insertions(+), 61 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 3651433ab..c5e2109d8 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -47,7 +47,6 @@ import java.util.*; public class AlignmentContext { protected GenomeLoc loc = null; protected ReadBackedPileup basePileup = null; - protected ReadBackedExtendedEventPileup extendedPileup = null; private LocusOverflowTracker tracker; /** @@ -89,10 +88,6 @@ public class AlignmentContext { this(loc, basePileup, 0); } - public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup) { - this(loc, extendedPileup, 0); - } - public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases ) { if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null"); if ( basePileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null"); @@ -103,16 +98,6 @@ public class AlignmentContext { this.skippedBases = skippedBases; } - public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup, long skippedBases ) { - if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null"); - if ( extendedPileup == null ) throw new StingException("BUG: ReadBackedExtendedEventPileup in Alignment context is null"); - if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context"); - - this.loc = loc; - this.extendedPileup = extendedPileup; - this.skippedBases = skippedBases; - } - /** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions * clear. * @return @@ -130,19 +115,24 @@ public class AlignmentContext { * only base pileup. * @return */ - public ReadBackedExtendedEventPileup getExtendedEventPileup() { return extendedPileup; } + public ReadBackedExtendedEventPileup getExtendedEventPileup() { + if(!hasExtendedEventPileup()) + throw new StingException("No extended event pileup is present."); + return (ReadBackedExtendedEventPileup)basePileup; + } - /** Returns true if this alignment context keeps base pileup over the current genomic location. - * + /** + * Returns true if this alignment context keeps base pileup over the current genomic location. + * TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface. * @return */ - public boolean hasBasePileup() { return basePileup != null; } + public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); } /** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location. * * @return */ - public boolean hasExtendedEventPileup() { return extendedPileup != null; } + public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; } /** * get all of the reads within this context @@ -151,7 +141,7 @@ public class AlignmentContext { */ @Deprecated //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory - public List getReads() { return ( basePileup == null ? extendedPileup.getReads() : basePileup.getReads() ); } + public List getReads() { return ( basePileup.getReads() ); } /** * Are there any reads associated with this locus? @@ -159,7 +149,7 @@ public class AlignmentContext { * @return */ public boolean hasReads() { - return ( (basePileup != null && basePileup.size() > 0) || (extendedPileup != null && extendedPileup.size() > 0) ) ; + return basePileup != null && basePileup.size() > 0 ; } /** @@ -167,9 +157,7 @@ public class AlignmentContext { * @return */ public int size() { - if ( basePileup == null && extendedPileup == null ) return 0; - //todo: both pileups can be non-nulls in theory (but not in current usage), what if sizes differ? - return extendedPileup == null ? basePileup.size() : extendedPileup.size(); + return basePileup.size(); } /** @@ -179,8 +167,7 @@ public class AlignmentContext { */ @Deprecated public List getOffsets() { - //todo: both pileups can be non-nulls in theory (but not in current usage), what offsets should we return? - return extendedPileup == null ? basePileup.getOffsets() : extendedPileup.getOffsets(); + return basePileup.getOffsets(); } public String getContig() { return getLocation().getContig(); } @@ -188,12 +175,7 @@ public class AlignmentContext { public GenomeLoc getLocation() { return loc; } public void downsampleToCoverage(int coverage) { - if ( basePileup != null ) { - basePileup = basePileup.getDownsampledPileup(coverage); - } - if ( extendedPileup != null ) { - extendedPileup = extendedPileup.getDownsampledPileup(coverage); - } + basePileup = basePileup.getDownsampledPileup(coverage); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 3b452d703..7f71303f2 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -393,6 +393,15 @@ public class LocusIteratorByState extends LocusIterator { state.getEventBases()) ); } else { + // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position + // and add in extra reads that start after this indel. Skip these reads. + // My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string + // like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be + // filtering these out. + // TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block. + //if(state.getRead().getAlignmentStart() > loc.getStart()) + // continue; + if ( state.getCurrentCigarOperator() != CigarOperator.N ) { // this read has no indel associated with the previous position on the ref; // we count this read in only if it has actual bases, not N span... diff --git a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index a4aa09f79..39cb534f8 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -439,7 +439,10 @@ public abstract class AbstractReadBackedPileup filteredTracker = new UnifiedPileupElementTracker(); for ( PE p : pileupElementTracker ) { - if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) { + if ( p.getRead().getMappingQuality() >= minMapQ && + (p.isDeletion() || + ((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) || + p.getQual() >= minBaseQ) ) { filteredTracker.add(p); } } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 21b7aad11..15583da7a 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.pileup; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; import java.util.Arrays; @@ -93,6 +94,29 @@ public class ExtendedEventPileupElement extends PileupElement { return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength); } + // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with + // a valid base quality. The following code attempts to compensate for that.' + + @Override + public byte getBase() { + return getBase(offset >= 0 ? offset : offset+eventLength); + } + + @Override + public int getBaseIndex() { + return getBaseIndex(offset >= 0 ? offset : offset+eventLength); + } + + @Override + public byte getSecondBase() { + return getSecondBase(offset >= 0 ? offset : offset+eventLength); + } + + @Override + public byte getQual() { + return getQual(offset >= 0 ? offset : offset+eventLength); + } + /** Returns length of the event (number of inserted or deleted bases */ public int getEventLength() { return eventLength; } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index e5f600aa1..5d563c84c 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -36,19 +36,19 @@ public class PileupElement { public int getOffset() { return offset; } public byte getBase() { - return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; + return getBase(offset); } public int getBaseIndex() { - return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]); + return getBaseIndex(offset); } public byte getSecondBase() { - return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset); + return getSecondBase(offset); } public byte getQual() { - return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; + return getQual(offset); } public int getMappingQual() { return read.getMappingQuality(); } @@ -56,4 +56,20 @@ public class PileupElement { public String toString() { return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual()); } + + protected byte getBase(final int offset) { + return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; + } + + protected int getBaseIndex(final int offset) { + return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]); + } + + protected byte getSecondBase(final int offset) { + return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset); + } + + protected byte getQual(final int offset) { + return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; + } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 804d26f09..195f97295 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -11,6 +11,8 @@ import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.Before; import org.junit.BeforeClass; @@ -20,6 +22,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Arrays; /** * testing of the LocusIteratorByState @@ -36,6 +39,102 @@ public class LocusIteratorByStateUnitTest extends BaseTest { GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); } + @Test + public void testIndelBaseQualityFiltering() { + final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; + + // create a test version of the Reads object + Reads readAttributes = new Reads(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true); + + SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); + before.setReadBases(bases); + before.setBaseQualities(new byte[] {20,20,20,20,0,20,20,20,20,20}); + before.setCigarString("10M"); + + SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10); + during.setReadBases(bases); + during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20}); + during.setCigarString("4M1I6M"); + + SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10); + after.setReadBases(bases); + after.setBaseQualities(new byte[] {20,20,0,20,20,20,20,20,20,20}); + after.setCigarString("10M"); + + List reads = Arrays.asList(before,during,after); + + // create the iterator by state with the fake reads and fake records + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + + boolean foundExtendedEventPileup = false; + while (li.hasNext()) { + AlignmentContext context = li.next(); + if(!context.hasExtendedEventPileup()) + continue; + + ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10); + Assert.assertEquals("Extended event pileup at wrong location",5,pileup.getLocation().getStart()); + Assert.assertEquals("Pileup size is incorrect",3,pileup.size()); + + foundExtendedEventPileup = true; + } + + Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup); + } + + /** + * Right now, the GATK's extended event pileup DOES NOT include reads which stop immediately before an insertion + * but DOES include reads which stop immediately after an insertion. This is almost certainly WRONG. Eric is + * figuring out the right way to handle this; in the meantime, adding this test to monitor that: + * A) the behavior is consistent + * B) so that we do end up with an automated test for this case when the model is fixed. + */ + @Test + public void testIndelPileupContainsAbuttingReads() { + final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; + final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; + + // create a test version of the Reads object + Reads readAttributes = new Reads(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true); + + SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); + before.setReadBases(bases); + before.setBaseQualities(quals); + before.setCigarString("10M"); + + SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,6,10); + during.setReadBases(bases); + during.setBaseQualities(quals); + during.setCigarString("5M1I5M"); + + SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,11,10); + after.setReadBases(bases); + after.setBaseQualities(quals); + after.setCigarString("10M"); + + List reads = Arrays.asList(before,during,after); + + // create the iterator by state with the fake reads and fake records + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + + boolean foundExtendedEventPileup = false; + while (li.hasNext()) { + AlignmentContext context = li.next(); + if(!context.hasExtendedEventPileup()) + continue; + + Assert.assertEquals("Extended event pileup at wrong location",10,context.getLocation().getStart()); + Assert.assertEquals("Pileup size is incorrect",2,context.size()); + Assert.assertEquals("Read in pileup is incorrect",during,context.getExtendedEventPileup().getReads().get(0)); + Assert.assertEquals("Read in pileup is incorrect",after,context.getExtendedEventPileup().getReads().get(1)); + + foundExtendedEventPileup = true; + } + + Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup); + } @Test public void testBasicWarnings() { @@ -45,11 +144,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20)); // create a test version of the Reads object - TestReads reads = new TestReads(new ArrayList()); - reads.setMaxPileupSize(MAX_READS); + Reads reads = new Reads(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); + li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); // inject the testing version of the locus iterator watcher li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); @@ -71,11 +170,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20)); // create a test version of the Reads object - TestReads reads = new TestReads(new ArrayList()); - reads.setMaxPileupSize(MAX_READS); + Reads reads = new Reads(new ArrayList()); + JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); + li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); // inject the testing version of the locus iterator watcher li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); @@ -89,23 +188,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest { } } - -class TestReads extends Reads { - - /** - * Simple constructor for unit testing. - * - * @param readsFiles List of reads files to open. - */ - public TestReads(List readsFiles) { - super(readsFiles); - } - - public void setMaxPileupSize(int maxSize) { - this.maximumReadsAtLocus = maxSize; - } -} - class FakeCloseableIterator implements CloseableIterator { Iterator iterator;