Cleanup for Steve Hershman's issue. In the midst of doing this, I discovered

that the semantics for which reads are in an extended event pileup are not
clear at this point.  Eric and I have planned a future clarification for this
and the two of us will discuss who will implement this clarification and when
it'll happen.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3809 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-07-16 18:57:58 +00:00
parent 6aedede7f3
commit 96034aee0e
6 changed files with 177 additions and 61 deletions

View File

@ -47,7 +47,6 @@ import java.util.*;
public class AlignmentContext { public class AlignmentContext {
protected GenomeLoc loc = null; protected GenomeLoc loc = null;
protected ReadBackedPileup basePileup = null; protected ReadBackedPileup basePileup = null;
protected ReadBackedExtendedEventPileup extendedPileup = null;
private LocusOverflowTracker tracker; private LocusOverflowTracker tracker;
/** /**
@ -89,10 +88,6 @@ public class AlignmentContext {
this(loc, basePileup, 0); this(loc, basePileup, 0);
} }
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup) {
this(loc, extendedPileup, 0);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases ) { public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null"); if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( basePileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null"); if ( basePileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
@ -103,16 +98,6 @@ public class AlignmentContext {
this.skippedBases = skippedBases; this.skippedBases = skippedBases;
} }
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( extendedPileup == null ) throw new StingException("BUG: ReadBackedExtendedEventPileup in Alignment context is null");
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.extendedPileup = extendedPileup;
this.skippedBases = skippedBases;
}
/** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions /** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions
* clear. * clear.
* @return * @return
@ -130,19 +115,24 @@ public class AlignmentContext {
* only base pileup. * only base pileup.
* @return * @return
*/ */
public ReadBackedExtendedEventPileup getExtendedEventPileup() { return extendedPileup; } public ReadBackedExtendedEventPileup getExtendedEventPileup() {
if(!hasExtendedEventPileup())
throw new StingException("No extended event pileup is present.");
return (ReadBackedExtendedEventPileup)basePileup;
}
/** Returns true if this alignment context keeps base pileup over the current genomic location. /**
* * Returns true if this alignment context keeps base pileup over the current genomic location.
* TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface.
* @return * @return
*/ */
public boolean hasBasePileup() { return basePileup != null; } public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); }
/** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location. /** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location.
* *
* @return * @return
*/ */
public boolean hasExtendedEventPileup() { return extendedPileup != null; } public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; }
/** /**
* get all of the reads within this context * get all of the reads within this context
@ -151,7 +141,7 @@ public class AlignmentContext {
*/ */
@Deprecated @Deprecated
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
public List<SAMRecord> getReads() { return ( basePileup == null ? extendedPileup.getReads() : basePileup.getReads() ); } public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
/** /**
* Are there any reads associated with this locus? * Are there any reads associated with this locus?
@ -159,7 +149,7 @@ public class AlignmentContext {
* @return * @return
*/ */
public boolean hasReads() { public boolean hasReads() {
return ( (basePileup != null && basePileup.size() > 0) || (extendedPileup != null && extendedPileup.size() > 0) ) ; return basePileup != null && basePileup.size() > 0 ;
} }
/** /**
@ -167,9 +157,7 @@ public class AlignmentContext {
* @return * @return
*/ */
public int size() { public int size() {
if ( basePileup == null && extendedPileup == null ) return 0; return basePileup.size();
//todo: both pileups can be non-nulls in theory (but not in current usage), what if sizes differ?
return extendedPileup == null ? basePileup.size() : extendedPileup.size();
} }
/** /**
@ -179,8 +167,7 @@ public class AlignmentContext {
*/ */
@Deprecated @Deprecated
public List<Integer> getOffsets() { public List<Integer> getOffsets() {
//todo: both pileups can be non-nulls in theory (but not in current usage), what offsets should we return? return basePileup.getOffsets();
return extendedPileup == null ? basePileup.getOffsets() : extendedPileup.getOffsets();
} }
public String getContig() { return getLocation().getContig(); } public String getContig() { return getLocation().getContig(); }
@ -188,13 +175,8 @@ public class AlignmentContext {
public GenomeLoc getLocation() { return loc; } public GenomeLoc getLocation() { return loc; }
public void downsampleToCoverage(int coverage) { public void downsampleToCoverage(int coverage) {
if ( basePileup != null ) {
basePileup = basePileup.getDownsampledPileup(coverage); basePileup = basePileup.getDownsampledPileup(coverage);
} }
if ( extendedPileup != null ) {
extendedPileup = extendedPileup.getDownsampledPileup(coverage);
}
}
/** /**
* Returns the number of bases we've skipped over in the reference since the last map invocation. * Returns the number of bases we've skipped over in the reference since the last map invocation.

View File

@ -393,6 +393,15 @@ public class LocusIteratorByState extends LocusIterator {
state.getEventBases()) ); state.getEventBases()) );
} else { } else {
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
// and add in extra reads that start after this indel. Skip these reads.
// My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string
// like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be
// filtering these out.
// TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block.
//if(state.getRead().getAlignmentStart() > loc.getStart())
// continue;
if ( state.getCurrentCigarOperator() != CigarOperator.N ) { if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
// this read has no indel associated with the previous position on the ref; // this read has no indel associated with the previous position on the ref;
// we count this read in only if it has actual bases, not N span... // we count this read in only if it has actual bases, not N span...

View File

@ -439,7 +439,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>(); UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : pileupElementTracker ) { for ( PE p : pileupElementTracker ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) { if ( p.getRead().getMappingQuality() >= minMapQ &&
(p.isDeletion() ||
((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) ||
p.getQual() >= minBaseQ) ) {
filteredTracker.add(p); filteredTracker.add(p);
} }
} }

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.Arrays; import java.util.Arrays;
@ -93,6 +94,29 @@ public class ExtendedEventPileupElement extends PileupElement {
return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength); return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength);
} }
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
// a valid base quality. The following code attempts to compensate for that.'
@Override
public byte getBase() {
return getBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public int getBaseIndex() {
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getSecondBase() {
return getSecondBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getQual() {
return getQual(offset >= 0 ? offset : offset+eventLength);
}
/** Returns length of the event (number of inserted or deleted bases */ /** Returns length of the event (number of inserted or deleted bases */
public int getEventLength() { return eventLength; } public int getEventLength() { return eventLength; }

View File

@ -36,19 +36,19 @@ public class PileupElement {
public int getOffset() { return offset; } public int getOffset() { return offset; }
public byte getBase() { public byte getBase() {
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; return getBase(offset);
} }
public int getBaseIndex() { public int getBaseIndex() {
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]); return getBaseIndex(offset);
} }
public byte getSecondBase() { public byte getSecondBase() {
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset); return getSecondBase(offset);
} }
public byte getQual() { public byte getQual() {
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; return getQual(offset);
} }
public int getMappingQual() { return read.getMappingQuality(); } public int getMappingQual() { return read.getMappingQuality(); }
@ -56,4 +56,20 @@ public class PileupElement {
public String toString() { public String toString() {
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual()); return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual());
} }
protected byte getBase(final int offset) {
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
}
protected int getBaseIndex(final int offset) {
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
}
protected byte getSecondBase(final int offset) {
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
}
protected byte getQual(final int offset) {
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
}
} }

View File

@ -11,6 +11,8 @@ import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -20,6 +22,7 @@ import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Arrays;
/** /**
* testing of the LocusIteratorByState * testing of the LocusIteratorByState
@ -36,6 +39,102 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
} }
@Test
public void testIndelBaseQualityFiltering() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
before.setBaseQualities(new byte[] {20,20,20,20,0,20,20,20,20,20});
before.setCigarString("10M");
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
during.setReadBases(bases);
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20});
during.setCigarString("4M1I6M");
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
after.setReadBases(bases);
after.setBaseQualities(new byte[] {20,20,0,20,20,20,20,20,20,20});
after.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
AlignmentContext context = li.next();
if(!context.hasExtendedEventPileup())
continue;
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10);
Assert.assertEquals("Extended event pileup at wrong location",5,pileup.getLocation().getStart());
Assert.assertEquals("Pileup size is incorrect",3,pileup.size());
foundExtendedEventPileup = true;
}
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
/**
* Right now, the GATK's extended event pileup DOES NOT include reads which stop immediately before an insertion
* but DOES include reads which stop immediately after an insertion. This is almost certainly WRONG. Eric is
* figuring out the right way to handle this; in the meantime, adding this test to monitor that:
* A) the behavior is consistent
* B) so that we do end up with an automated test for this case when the model is fixed.
*/
@Test
public void testIndelPileupContainsAbuttingReads() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
before.setBaseQualities(quals);
before.setCigarString("10M");
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,6,10);
during.setReadBases(bases);
during.setBaseQualities(quals);
during.setCigarString("5M1I5M");
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,11,10);
after.setReadBases(bases);
after.setBaseQualities(quals);
after.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
AlignmentContext context = li.next();
if(!context.hasExtendedEventPileup())
continue;
Assert.assertEquals("Extended event pileup at wrong location",10,context.getLocation().getStart());
Assert.assertEquals("Pileup size is incorrect",2,context.size());
Assert.assertEquals("Read in pileup is incorrect",during,context.getExtendedEventPileup().getReads().get(0));
Assert.assertEquals("Read in pileup is incorrect",after,context.getExtendedEventPileup().getReads().get(1));
foundExtendedEventPileup = true;
}
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
@Test @Test
public void testBasicWarnings() { public void testBasicWarnings() {
@ -45,11 +144,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20)); records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
// create a test version of the Reads object // create a test version of the Reads object
TestReads reads = new TestReads(new ArrayList<File>()); Reads reads = new Reads(new ArrayList<File>());
reads.setMaxPileupSize(MAX_READS); JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher // inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -71,11 +170,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20)); records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
// create a test version of the Reads object // create a test version of the Reads object
TestReads reads = new TestReads(new ArrayList<File>()); Reads reads = new Reads(new ArrayList<File>());
reads.setMaxPileupSize(MAX_READS); JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads); li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher // inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -89,23 +188,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
} }
} }
class TestReads extends Reads {
/**
* Simple constructor for unit testing.
*
* @param readsFiles List of reads files to open.
*/
public TestReads(List<File> readsFiles) {
super(readsFiles);
}
public void setMaxPileupSize(int maxSize) {
this.maximumReadsAtLocus = maxSize;
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> { class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator; Iterator<T> iterator;