Cleanup for Steve Hershman's issue. In the midst of doing this, I discovered

that the semantics for which reads are in an extended event pileup are not
clear at this point.  Eric and I have planned a future clarification for this
and the two of us will discuss who will implement this clarification and when
it'll happen.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3809 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-07-16 18:57:58 +00:00
parent 6aedede7f3
commit 96034aee0e
6 changed files with 177 additions and 61 deletions

View File

@ -47,7 +47,6 @@ import java.util.*;
public class AlignmentContext {
protected GenomeLoc loc = null;
protected ReadBackedPileup basePileup = null;
protected ReadBackedExtendedEventPileup extendedPileup = null;
private LocusOverflowTracker tracker;
/**
@ -89,10 +88,6 @@ public class AlignmentContext {
this(loc, basePileup, 0);
}
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup) {
this(loc, extendedPileup, 0);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( basePileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
@ -103,16 +98,6 @@ public class AlignmentContext {
this.skippedBases = skippedBases;
}
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( extendedPileup == null ) throw new StingException("BUG: ReadBackedExtendedEventPileup in Alignment context is null");
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.extendedPileup = extendedPileup;
this.skippedBases = skippedBases;
}
/** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions
* clear.
* @return
@ -130,19 +115,24 @@ public class AlignmentContext {
* only base pileup.
* @return
*/
public ReadBackedExtendedEventPileup getExtendedEventPileup() { return extendedPileup; }
public ReadBackedExtendedEventPileup getExtendedEventPileup() {
if(!hasExtendedEventPileup())
throw new StingException("No extended event pileup is present.");
return (ReadBackedExtendedEventPileup)basePileup;
}
/** Returns true if this alignment context keeps base pileup over the current genomic location.
*
/**
* Returns true if this alignment context keeps base pileup over the current genomic location.
* TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface.
* @return
*/
public boolean hasBasePileup() { return basePileup != null; }
public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); }
/** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location.
*
* @return
*/
public boolean hasExtendedEventPileup() { return extendedPileup != null; }
public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; }
/**
* get all of the reads within this context
@ -151,7 +141,7 @@ public class AlignmentContext {
*/
@Deprecated
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
public List<SAMRecord> getReads() { return ( basePileup == null ? extendedPileup.getReads() : basePileup.getReads() ); }
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
/**
* Are there any reads associated with this locus?
@ -159,7 +149,7 @@ public class AlignmentContext {
* @return
*/
public boolean hasReads() {
return ( (basePileup != null && basePileup.size() > 0) || (extendedPileup != null && extendedPileup.size() > 0) ) ;
return basePileup != null && basePileup.size() > 0 ;
}
/**
@ -167,9 +157,7 @@ public class AlignmentContext {
* @return
*/
public int size() {
if ( basePileup == null && extendedPileup == null ) return 0;
//todo: both pileups can be non-nulls in theory (but not in current usage), what if sizes differ?
return extendedPileup == null ? basePileup.size() : extendedPileup.size();
return basePileup.size();
}
/**
@ -179,8 +167,7 @@ public class AlignmentContext {
*/
@Deprecated
public List<Integer> getOffsets() {
//todo: both pileups can be non-nulls in theory (but not in current usage), what offsets should we return?
return extendedPileup == null ? basePileup.getOffsets() : extendedPileup.getOffsets();
return basePileup.getOffsets();
}
public String getContig() { return getLocation().getContig(); }
@ -188,12 +175,7 @@ public class AlignmentContext {
public GenomeLoc getLocation() { return loc; }
public void downsampleToCoverage(int coverage) {
if ( basePileup != null ) {
basePileup = basePileup.getDownsampledPileup(coverage);
}
if ( extendedPileup != null ) {
extendedPileup = extendedPileup.getDownsampledPileup(coverage);
}
basePileup = basePileup.getDownsampledPileup(coverage);
}
/**

View File

@ -393,6 +393,15 @@ public class LocusIteratorByState extends LocusIterator {
state.getEventBases()) );
} else {
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
// and add in extra reads that start after this indel. Skip these reads.
// My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string
// like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be
// filtering these out.
// TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block.
//if(state.getRead().getAlignmentStart() > loc.getStart())
// continue;
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
// this read has no indel associated with the previous position on the ref;
// we count this read in only if it has actual bases, not N span...

View File

@ -439,7 +439,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : pileupElementTracker ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
if ( p.getRead().getMappingQuality() >= minMapQ &&
(p.isDeletion() ||
((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) ||
p.getQual() >= minBaseQ) ) {
filteredTracker.add(p);
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord;
import java.util.Arrays;
@ -93,6 +94,29 @@ public class ExtendedEventPileupElement extends PileupElement {
return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength);
}
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
// a valid base quality. The following code attempts to compensate for that.'
@Override
public byte getBase() {
return getBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public int getBaseIndex() {
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getSecondBase() {
return getSecondBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getQual() {
return getQual(offset >= 0 ? offset : offset+eventLength);
}
/** Returns length of the event (number of inserted or deleted bases */
public int getEventLength() { return eventLength; }

View File

@ -36,19 +36,19 @@ public class PileupElement {
public int getOffset() { return offset; }
public byte getBase() {
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
return getBase(offset);
}
public int getBaseIndex() {
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
return getBaseIndex(offset);
}
public byte getSecondBase() {
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
return getSecondBase(offset);
}
public byte getQual() {
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
return getQual(offset);
}
public int getMappingQual() { return read.getMappingQuality(); }
@ -56,4 +56,20 @@ public class PileupElement {
public String toString() {
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual());
}
protected byte getBase(final int offset) {
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
}
protected int getBaseIndex(final int offset) {
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
}
protected byte getSecondBase(final int offset) {
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
}
protected byte getQual(final int offset) {
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
}
}

View File

@ -11,6 +11,8 @@ import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before;
import org.junit.BeforeClass;
@ -20,6 +22,7 @@ import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Arrays;
/**
* testing of the LocusIteratorByState
@ -36,6 +39,102 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Test
public void testIndelBaseQualityFiltering() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
before.setBaseQualities(new byte[] {20,20,20,20,0,20,20,20,20,20});
before.setCigarString("10M");
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
during.setReadBases(bases);
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20});
during.setCigarString("4M1I6M");
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
after.setReadBases(bases);
after.setBaseQualities(new byte[] {20,20,0,20,20,20,20,20,20,20});
after.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
AlignmentContext context = li.next();
if(!context.hasExtendedEventPileup())
continue;
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10);
Assert.assertEquals("Extended event pileup at wrong location",5,pileup.getLocation().getStart());
Assert.assertEquals("Pileup size is incorrect",3,pileup.size());
foundExtendedEventPileup = true;
}
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
/**
* Right now, the GATK's extended event pileup DOES NOT include reads which stop immediately before an insertion
* but DOES include reads which stop immediately after an insertion. This is almost certainly WRONG. Eric is
* figuring out the right way to handle this; in the meantime, adding this test to monitor that:
* A) the behavior is consistent
* B) so that we do end up with an automated test for this case when the model is fixed.
*/
@Test
public void testIndelPileupContainsAbuttingReads() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object
Reads readAttributes = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
before.setBaseQualities(quals);
before.setCigarString("10M");
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,6,10);
during.setReadBases(bases);
during.setBaseQualities(quals);
during.setCigarString("5M1I5M");
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,11,10);
after.setReadBases(bases);
after.setBaseQualities(quals);
after.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
AlignmentContext context = li.next();
if(!context.hasExtendedEventPileup())
continue;
Assert.assertEquals("Extended event pileup at wrong location",10,context.getLocation().getStart());
Assert.assertEquals("Pileup size is incorrect",2,context.size());
Assert.assertEquals("Read in pileup is incorrect",during,context.getExtendedEventPileup().getReads().get(0));
Assert.assertEquals("Read in pileup is incorrect",after,context.getExtendedEventPileup().getReads().get(1));
foundExtendedEventPileup = true;
}
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
@Test
public void testBasicWarnings() {
@ -45,11 +144,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
// create a test version of the Reads object
TestReads reads = new TestReads(new ArrayList<File>());
reads.setMaxPileupSize(MAX_READS);
Reads reads = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -71,11 +170,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
// create a test version of the Reads object
TestReads reads = new TestReads(new ArrayList<File>());
reads.setMaxPileupSize(MAX_READS);
Reads reads = new Reads(new ArrayList<File>());
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -89,23 +188,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
}
}
class TestReads extends Reads {
/**
* Simple constructor for unit testing.
*
* @param readsFiles List of reads files to open.
*/
public TestReads(List<File> readsFiles) {
super(readsFiles);
}
public void setMaxPileupSize(int maxSize) {
this.maximumReadsAtLocus = maxSize;
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;