Cleanup for Steve Hershman's issue. In the midst of doing this, I discovered
that the semantics for which reads are in an extended event pileup are not clear at this point. Eric and I have planned a future clarification for this and the two of us will discuss who will implement this clarification and when it'll happen. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3809 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6aedede7f3
commit
96034aee0e
|
|
@ -47,7 +47,6 @@ import java.util.*;
|
|||
public class AlignmentContext {
|
||||
protected GenomeLoc loc = null;
|
||||
protected ReadBackedPileup basePileup = null;
|
||||
protected ReadBackedExtendedEventPileup extendedPileup = null;
|
||||
private LocusOverflowTracker tracker;
|
||||
|
||||
/**
|
||||
|
|
@ -89,10 +88,6 @@ public class AlignmentContext {
|
|||
this(loc, basePileup, 0);
|
||||
}
|
||||
|
||||
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup) {
|
||||
this(loc, extendedPileup, 0);
|
||||
}
|
||||
|
||||
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases ) {
|
||||
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||
if ( basePileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
|
||||
|
|
@ -103,16 +98,6 @@ public class AlignmentContext {
|
|||
this.skippedBases = skippedBases;
|
||||
}
|
||||
|
||||
public AlignmentContext(GenomeLoc loc, ReadBackedExtendedEventPileup extendedPileup, long skippedBases ) {
|
||||
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||
if ( extendedPileup == null ) throw new StingException("BUG: ReadBackedExtendedEventPileup in Alignment context is null");
|
||||
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||
|
||||
this.loc = loc;
|
||||
this.extendedPileup = extendedPileup;
|
||||
this.skippedBases = skippedBases;
|
||||
}
|
||||
|
||||
/** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions
|
||||
* clear.
|
||||
* @return
|
||||
|
|
@ -130,19 +115,24 @@ public class AlignmentContext {
|
|||
* only base pileup.
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getExtendedEventPileup() { return extendedPileup; }
|
||||
public ReadBackedExtendedEventPileup getExtendedEventPileup() {
|
||||
if(!hasExtendedEventPileup())
|
||||
throw new StingException("No extended event pileup is present.");
|
||||
return (ReadBackedExtendedEventPileup)basePileup;
|
||||
}
|
||||
|
||||
/** Returns true if this alignment context keeps base pileup over the current genomic location.
|
||||
*
|
||||
/**
|
||||
* Returns true if this alignment context keeps base pileup over the current genomic location.
|
||||
* TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasBasePileup() { return basePileup != null; }
|
||||
public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); }
|
||||
|
||||
/** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasExtendedEventPileup() { return extendedPileup != null; }
|
||||
public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; }
|
||||
|
||||
/**
|
||||
* get all of the reads within this context
|
||||
|
|
@ -151,7 +141,7 @@ public class AlignmentContext {
|
|||
*/
|
||||
@Deprecated
|
||||
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
|
||||
public List<SAMRecord> getReads() { return ( basePileup == null ? extendedPileup.getReads() : basePileup.getReads() ); }
|
||||
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
|
||||
|
||||
/**
|
||||
* Are there any reads associated with this locus?
|
||||
|
|
@ -159,7 +149,7 @@ public class AlignmentContext {
|
|||
* @return
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return ( (basePileup != null && basePileup.size() > 0) || (extendedPileup != null && extendedPileup.size() > 0) ) ;
|
||||
return basePileup != null && basePileup.size() > 0 ;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -167,9 +157,7 @@ public class AlignmentContext {
|
|||
* @return
|
||||
*/
|
||||
public int size() {
|
||||
if ( basePileup == null && extendedPileup == null ) return 0;
|
||||
//todo: both pileups can be non-nulls in theory (but not in current usage), what if sizes differ?
|
||||
return extendedPileup == null ? basePileup.size() : extendedPileup.size();
|
||||
return basePileup.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -179,8 +167,7 @@ public class AlignmentContext {
|
|||
*/
|
||||
@Deprecated
|
||||
public List<Integer> getOffsets() {
|
||||
//todo: both pileups can be non-nulls in theory (but not in current usage), what offsets should we return?
|
||||
return extendedPileup == null ? basePileup.getOffsets() : extendedPileup.getOffsets();
|
||||
return basePileup.getOffsets();
|
||||
}
|
||||
|
||||
public String getContig() { return getLocation().getContig(); }
|
||||
|
|
@ -188,12 +175,7 @@ public class AlignmentContext {
|
|||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public void downsampleToCoverage(int coverage) {
|
||||
if ( basePileup != null ) {
|
||||
basePileup = basePileup.getDownsampledPileup(coverage);
|
||||
}
|
||||
if ( extendedPileup != null ) {
|
||||
extendedPileup = extendedPileup.getDownsampledPileup(coverage);
|
||||
}
|
||||
basePileup = basePileup.getDownsampledPileup(coverage);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -393,6 +393,15 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
|
||||
// and add in extra reads that start after this indel. Skip these reads.
|
||||
// My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string
|
||||
// like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be
|
||||
// filtering these out.
|
||||
// TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block.
|
||||
//if(state.getRead().getAlignmentStart() > loc.getStart())
|
||||
// continue;
|
||||
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// this read has no indel associated with the previous position on the ref;
|
||||
// we count this read in only if it has actual bases, not N span...
|
||||
|
|
|
|||
|
|
@ -439,7 +439,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : pileupElementTracker ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ &&
|
||||
(p.isDeletion() ||
|
||||
((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) ||
|
||||
p.getQual() >= minBaseQ) ) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -93,6 +94,29 @@ public class ExtendedEventPileupElement extends PileupElement {
|
|||
return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength);
|
||||
}
|
||||
|
||||
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
||||
// a valid base quality. The following code attempts to compensate for that.'
|
||||
|
||||
@Override
|
||||
public byte getBase() {
|
||||
return getBase(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBaseIndex() {
|
||||
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getSecondBase() {
|
||||
return getSecondBase(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual() {
|
||||
return getQual(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
/** Returns length of the event (number of inserted or deleted bases */
|
||||
public int getEventLength() { return eventLength; }
|
||||
|
||||
|
|
|
|||
|
|
@ -36,19 +36,19 @@ public class PileupElement {
|
|||
public int getOffset() { return offset; }
|
||||
|
||||
public byte getBase() {
|
||||
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
|
||||
return getBase(offset);
|
||||
}
|
||||
|
||||
public int getBaseIndex() {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
|
||||
return getBaseIndex(offset);
|
||||
}
|
||||
|
||||
public byte getSecondBase() {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
|
||||
return getSecondBase(offset);
|
||||
}
|
||||
|
||||
public byte getQual() {
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
return getQual(offset);
|
||||
}
|
||||
|
||||
public int getMappingQual() { return read.getMappingQuality(); }
|
||||
|
|
@ -56,4 +56,20 @@ public class PileupElement {
|
|||
public String toString() {
|
||||
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual());
|
||||
}
|
||||
|
||||
protected byte getBase(final int offset) {
|
||||
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
protected int getBaseIndex(final int offset) {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
|
||||
}
|
||||
|
||||
protected byte getSecondBase(final int offset) {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
|
||||
}
|
||||
|
||||
protected byte getQual(final int offset) {
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,8 @@ import org.broadinstitute.sting.gatk.Reads;
|
|||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
|
@ -20,6 +22,7 @@ import java.io.File;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* testing of the LocusIteratorByState
|
||||
|
|
@ -36,6 +39,102 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelBaseQualityFiltering() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
Reads readAttributes = new Reads(new ArrayList<File>());
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
|
||||
|
||||
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
before.setReadBases(bases);
|
||||
before.setBaseQualities(new byte[] {20,20,20,20,0,20,20,20,20,20});
|
||||
before.setCigarString("10M");
|
||||
|
||||
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
during.setReadBases(bases);
|
||||
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20});
|
||||
during.setCigarString("4M1I6M");
|
||||
|
||||
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
after.setReadBases(bases);
|
||||
after.setBaseQualities(new byte[] {20,20,0,20,20,20,20,20,20,20});
|
||||
after.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(before,during,after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
|
||||
|
||||
boolean foundExtendedEventPileup = false;
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
if(!context.hasExtendedEventPileup())
|
||||
continue;
|
||||
|
||||
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10);
|
||||
Assert.assertEquals("Extended event pileup at wrong location",5,pileup.getLocation().getStart());
|
||||
Assert.assertEquals("Pileup size is incorrect",3,pileup.size());
|
||||
|
||||
foundExtendedEventPileup = true;
|
||||
}
|
||||
|
||||
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Right now, the GATK's extended event pileup DOES NOT include reads which stop immediately before an insertion
|
||||
* but DOES include reads which stop immediately after an insertion. This is almost certainly WRONG. Eric is
|
||||
* figuring out the right way to handle this; in the meantime, adding this test to monitor that:
|
||||
* A) the behavior is consistent
|
||||
* B) so that we do end up with an automated test for this case when the model is fixed.
|
||||
*/
|
||||
@Test
|
||||
public void testIndelPileupContainsAbuttingReads() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
|
||||
|
||||
// create a test version of the Reads object
|
||||
Reads readAttributes = new Reads(new ArrayList<File>());
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"generateExtendedEvents"),readAttributes,true);
|
||||
|
||||
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
before.setReadBases(bases);
|
||||
before.setBaseQualities(quals);
|
||||
before.setCigarString("10M");
|
||||
|
||||
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,6,10);
|
||||
during.setReadBases(bases);
|
||||
during.setBaseQualities(quals);
|
||||
during.setCigarString("5M1I5M");
|
||||
|
||||
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,11,10);
|
||||
after.setReadBases(bases);
|
||||
after.setBaseQualities(quals);
|
||||
after.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(before,during,after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes);
|
||||
|
||||
boolean foundExtendedEventPileup = false;
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
if(!context.hasExtendedEventPileup())
|
||||
continue;
|
||||
|
||||
Assert.assertEquals("Extended event pileup at wrong location",10,context.getLocation().getStart());
|
||||
Assert.assertEquals("Pileup size is incorrect",2,context.size());
|
||||
Assert.assertEquals("Read in pileup is incorrect",during,context.getExtendedEventPileup().getReads().get(0));
|
||||
Assert.assertEquals("Read in pileup is incorrect",after,context.getExtendedEventPileup().getReads().get(1));
|
||||
|
||||
foundExtendedEventPileup = true;
|
||||
}
|
||||
|
||||
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicWarnings() {
|
||||
|
|
@ -45,11 +144,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
|
||||
|
||||
// create a test version of the Reads object
|
||||
TestReads reads = new TestReads(new ArrayList<File>());
|
||||
reads.setMaxPileupSize(MAX_READS);
|
||||
Reads reads = new Reads(new ArrayList<File>());
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
|
||||
|
||||
// inject the testing version of the locus iterator watcher
|
||||
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||
|
|
@ -71,11 +170,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
|
||||
|
||||
// create a test version of the Reads object
|
||||
TestReads reads = new TestReads(new ArrayList<File>());
|
||||
reads.setMaxPileupSize(MAX_READS);
|
||||
Reads reads = new Reads(new ArrayList<File>());
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(Reads.class,"maximumReadsAtLocus"),reads,MAX_READS);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(records.iterator()), reads);
|
||||
|
||||
// inject the testing version of the locus iterator watcher
|
||||
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||
|
|
@ -89,23 +188,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
class TestReads extends Reads {
|
||||
|
||||
/**
|
||||
* Simple constructor for unit testing.
|
||||
*
|
||||
* @param readsFiles List of reads files to open.
|
||||
*/
|
||||
public TestReads(List<File> readsFiles) {
|
||||
super(readsFiles);
|
||||
}
|
||||
|
||||
public void setMaxPileupSize(int maxSize) {
|
||||
this.maximumReadsAtLocus = maxSize;
|
||||
}
|
||||
}
|
||||
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue