Refactor the Pileup Element with regards to indels
Eric reported this bug due to the reduced reads failing with an index out of bounds on what we thought was a deletion, but turned out to be a read starting with insertion. * Refactored PileupElement to distinguish clearly between deletions and read starting with insertion * Modified ExtendedEventPileup to correctly distinguish elements with deletion when creating new pileups * Refactored most of the lazyLoadNextAlignment() function of the LocusIteratorByState for clarity and to create clear separation between what is a pileup with a deletion and what's not one. Got rid of many useless if statements. * Changed the way LocusIteratorByState creates extended event pileups to differentiate between insertions in the beginning of the read and deletions. * Every deletion now has an offset (start of the event) * Fixed bug when LocusITeratorByState found a read starting with insertion that happened to be a reduced read. * Separated the definitions of deletion/insertion (in the beginning of the read) in all UG annotations (and the annotator engine). * Pileup depth of coverage for a deleted base will now return the average coverage around the deletion. * Indel ReadPositionRankSum test now uses the deletion true offset from the read, changed all appropriate md5's * The extra pileup elements now properly read by the Indel mode of the UG made any subsequent call have a different random number and therefore all RankSum tests have slightly different values (in the 10^-3 range). Updated all appropriate md5s after extremely careful inspection -- Thanks Ryan! phew!
This commit is contained in:
parent
4aacaf8916
commit
ffd61f4c1c
|
|
@ -23,7 +23,7 @@ import java.util.NoSuchElementException;
|
|||
*/
|
||||
|
||||
/**
|
||||
* A LocusView over which the user can iterate.
|
||||
* A LocusView over which the user can iterate.
|
||||
*/
|
||||
|
||||
public class AllLocusView extends LocusView {
|
||||
|
|
@ -47,12 +47,13 @@ public class AllLocusView extends LocusView {
|
|||
|
||||
/**
|
||||
* Create a new queue of locus contexts.
|
||||
*
|
||||
* @param provider
|
||||
*/
|
||||
public AllLocusView(LocusShardDataProvider provider) {
|
||||
super( provider );
|
||||
public AllLocusView(LocusShardDataProvider provider) {
|
||||
super(provider);
|
||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||
locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus());
|
||||
locusIterator = new GenomeLocusIterator(genomeLocParser, provider.getLocus());
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -63,7 +64,7 @@ public class AllLocusView extends LocusView {
|
|||
public AlignmentContext next() {
|
||||
advance();
|
||||
|
||||
if(nextPosition == null)
|
||||
if (nextPosition == null)
|
||||
throw new NoSuchElementException("No next is available in the all locus view");
|
||||
|
||||
// Flag to the iterator that no data is waiting in the queue to be processed.
|
||||
|
|
@ -72,7 +73,7 @@ public class AllLocusView extends LocusView {
|
|||
AlignmentContext currentLocus;
|
||||
|
||||
// If actual data is present, return it. Otherwise, return empty data.
|
||||
if( nextLocus != null && nextLocus.getLocation().equals(nextPosition) )
|
||||
if (nextLocus != null && nextLocus.getLocation().equals(nextPosition))
|
||||
currentLocus = nextLocus;
|
||||
else
|
||||
currentLocus = createEmptyLocus(nextPosition);
|
||||
|
|
@ -82,15 +83,15 @@ public class AllLocusView extends LocusView {
|
|||
|
||||
private void advance() {
|
||||
// Already at the next element? Don't move forward.
|
||||
if(atNextElement)
|
||||
if (atNextElement)
|
||||
return;
|
||||
|
||||
// Out of elements?
|
||||
if(nextPosition == null && !locusIterator.hasNext())
|
||||
return;
|
||||
if (nextPosition == null && !locusIterator.hasNext())
|
||||
return;
|
||||
|
||||
// If nextLocus has been consumed, clear it out to make room for the next incoming locus.
|
||||
if(nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) {
|
||||
if (nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) {
|
||||
nextLocus = null;
|
||||
|
||||
// Determine the next locus. The trick is that we may have more than one alignment context at the same
|
||||
|
|
@ -98,9 +99,9 @@ public class AllLocusView extends LocusView {
|
|||
// is still at the current position, we do not increment current position and wait for next call to next() to return
|
||||
// that context. If we know that next context is past the current position, we are done with current
|
||||
// position
|
||||
if(hasNextLocus()) {
|
||||
if (hasNextLocus()) {
|
||||
nextLocus = nextLocus();
|
||||
if(nextPosition.equals(nextLocus.getLocation())) {
|
||||
if (nextPosition.equals(nextLocus.getLocation())) {
|
||||
atNextElement = true;
|
||||
return;
|
||||
}
|
||||
|
|
@ -108,7 +109,7 @@ public class AllLocusView extends LocusView {
|
|||
}
|
||||
|
||||
// No elements left in queue? Clear out the position state tracker and return.
|
||||
if(!locusIterator.hasNext()) {
|
||||
if (!locusIterator.hasNext()) {
|
||||
nextPosition = null;
|
||||
return;
|
||||
}
|
||||
|
|
@ -119,9 +120,9 @@ public class AllLocusView extends LocusView {
|
|||
|
||||
// Crank the iterator to (if possible) or past the next context. Be careful not to hold a reference to nextLocus
|
||||
// while using the hasNextLocus() / nextLocus() machinery; this will cause us to use more memory than is optimal.
|
||||
while(nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) {
|
||||
while (nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) {
|
||||
nextLocus = null;
|
||||
if(!hasNextLocus())
|
||||
if (!hasNextLocus())
|
||||
break;
|
||||
nextLocus = nextLocus();
|
||||
}
|
||||
|
|
@ -129,12 +130,15 @@ public class AllLocusView extends LocusView {
|
|||
|
||||
/**
|
||||
* Creates a blank locus context at the specified location.
|
||||
*
|
||||
* @param site Site at which to create the blank locus context.
|
||||
* @return empty context.
|
||||
*/
|
||||
private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
|
||||
private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
|
||||
private AlignmentContext createEmptyLocus( GenomeLoc site ) {
|
||||
return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
|
||||
private final static List<Boolean> EMPTY_DELETION_STATUS = Collections.emptyList();
|
||||
|
||||
private AlignmentContext createEmptyLocus(GenomeLoc site) {
|
||||
return new AlignmentContext(site, new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,9 +49,13 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
||||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
public class LocusIteratorByState extends LocusIterator {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -92,12 +96,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
||||
// the only purpose of this flag is to shield away a few additional lines of code
|
||||
// when extended piles are not needed, it may not be even worth it...
|
||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||
|
||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
||||
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||
|
||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
||||
// we cache it here mainly for convenience
|
||||
|
||||
|
|
@ -111,23 +117,31 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//System.out.printf("Creating a SAMRecordState: %s%n", this);
|
||||
}
|
||||
|
||||
public SAMRecord getRead() { return read; }
|
||||
public SAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getReadOffset() { return readOffset; }
|
||||
public int getReadOffset() {
|
||||
return readOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getGenomeOffset() { return genomeOffset; }
|
||||
public int getGenomeOffset() {
|
||||
return genomeOffset;
|
||||
}
|
||||
|
||||
public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); }
|
||||
public int getGenomePosition() {
|
||||
return read.getAlignmentStart() + getGenomeOffset();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
|
|
@ -137,19 +151,26 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
/** Returns true if we just stepped over insertion/into a deletion prior to the last return from stepForwardOnGenome.
|
||||
/**
|
||||
* Returns true if we just stepped over insertion/into a deletion prior to the last return from stepForwardOnGenome.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hadIndel() {
|
||||
return ( eventLength > 0 );
|
||||
return (eventLength > 0);
|
||||
}
|
||||
|
||||
public int getEventLength() { return eventLength; }
|
||||
public int getEventLength() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
public byte[] getEventBases() { return insertedBases; }
|
||||
public byte[] getEventBases() {
|
||||
return insertedBases;
|
||||
}
|
||||
|
||||
public int getReadEventStartOffset() { return eventStart; }
|
||||
public int getReadEventStartOffset() {
|
||||
return eventStart;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
|
|
@ -160,9 +181,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
||||
|
||||
|
||||
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
|
||||
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
|
||||
cigarOffset++;
|
||||
if ( cigarOffset < nCigarElements ) {
|
||||
if (cigarOffset < nCigarElements) {
|
||||
curElement = cigar.getCigarElement(cigarOffset);
|
||||
cigarElementCounter = 0;
|
||||
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||
|
|
@ -174,15 +195,15 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||
// model a point on the reference somewhere after the end of the read.
|
||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
if ( generateExtendedEvents && eventDelayedFlag > 0 ) {
|
||||
if (generateExtendedEvents && eventDelayedFlag > 0) {
|
||||
|
||||
// if we had an indel right before the read ended (i.e. insertion was the last cigar element),
|
||||
// we keep it until next reference base; then we discard it and this will allow the LocusIterator to
|
||||
// finally discard this read
|
||||
eventDelayedFlag--;
|
||||
if ( eventDelayedFlag == 0 ) {
|
||||
if (eventDelayedFlag == 0) {
|
||||
eventLength = -1; // reset event when we are past it
|
||||
insertedBases = null;
|
||||
eventStart = -1;
|
||||
|
|
@ -193,34 +214,35 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
boolean done = false;
|
||||
switch (curElement.getOperator()) {
|
||||
case H : // ignore hard clips
|
||||
case P : // ignore pads
|
||||
case H: // ignore hard clips
|
||||
case P: // ignore pads
|
||||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I : // insertion w.r.t. the reference
|
||||
if ( generateExtendedEvents ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if (generateExtendedEvents) {
|
||||
// we see insertions only once, when we step right onto them; the position on the read is scrolled
|
||||
// past the insertion right after that
|
||||
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName());
|
||||
insertedBases = Arrays.copyOfRange(read.getReadBases(),readOffset+1,readOffset+1+curElement.getLength());
|
||||
eventLength = curElement.getLength() ;
|
||||
if (eventDelayedFlag > 1)
|
||||
throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName());
|
||||
insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + curElement.getLength());
|
||||
eventLength = curElement.getLength();
|
||||
eventStart = readOffset;
|
||||
eventDelayedFlag = 2; // insertion causes re-entry into stepForwardOnGenome, so we set the delay to 2
|
||||
// System.out.println("Inserted "+(new String (insertedBases)) +" after "+readOffset);
|
||||
} // continue onto the 'S' case !
|
||||
case S : // soft clip
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D : // deletion w.r.t. the reference
|
||||
if ( generateExtendedEvents ) {
|
||||
if ( cigarElementCounter == 1) {
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (generateExtendedEvents) {
|
||||
if (cigarElementCounter == 1) {
|
||||
// generate an extended event only if we just stepped into the deletion (i.e. don't
|
||||
// generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!)
|
||||
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName());
|
||||
if (eventDelayedFlag > 1)
|
||||
throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName());
|
||||
eventLength = curElement.getLength();
|
||||
eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only
|
||||
eventStart = readOffset;
|
||||
|
|
@ -232,26 +254,27 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case M :
|
||||
case M:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
default:
|
||||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
if ( generateExtendedEvents ) {
|
||||
if ( eventDelayedFlag > 0 && done ) {
|
||||
// if we did make a successful step on the ref, decrement delayed flag. If, upon the decrementthe,
|
||||
if (generateExtendedEvents) {
|
||||
if (eventDelayedFlag > 0 && done) {
|
||||
// if we did make a successful step on the ref, decrement delayed flag. If, upon the decrementing the,
|
||||
// the flag is 1, we are standing on the reference base right after the indel (so we have to keep it).
|
||||
// Otherwise, we are away from the previous indel and have to clear our memories...
|
||||
eventDelayedFlag--; // when we notice an indel, we set delayed flag to 2, so now
|
||||
// if eventDelayedFlag == 1, an indel occured right before the current base
|
||||
if ( eventDelayedFlag == 0 ) {
|
||||
// if eventDelayedFlag == 1, an indel occured right before the current base
|
||||
if (eventDelayedFlag == 0) {
|
||||
eventLength = -1; // reset event when we are past it
|
||||
insertedBases = null;
|
||||
eventStart = -1;
|
||||
|
|
@ -274,15 +297,15 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
|
||||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
|
||||
this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
if ( this.samples.isEmpty() && samIterator.hasNext() ) {
|
||||
if (this.samples.isEmpty() && samIterator.hasNext()) {
|
||||
throw new IllegalArgumentException("samples list must not be empty");
|
||||
}
|
||||
}
|
||||
|
|
@ -322,7 +345,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public AlignmentContext next() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
if(!hasNext())
|
||||
if (!hasNext())
|
||||
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
|
||||
AlignmentContext currentAlignmentContext = nextAlignmentContext;
|
||||
nextAlignmentContext = null;
|
||||
|
|
@ -334,7 +357,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
||||
*/
|
||||
private void lazyLoadNextAlignmentContext() {
|
||||
while(nextAlignmentContext == null && readStates.hasNext()) {
|
||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||
readStates.collectPendingReads();
|
||||
|
||||
|
|
@ -350,14 +373,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||
// and shift the position.
|
||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||
Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
|
||||
Map<String, ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String, ReadBackedExtendedEventPileupImpl>();
|
||||
|
||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||
// are associated with the *previous* reference base
|
||||
GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
|
||||
GenomeLoc loc = genomeLocParser.incPos(getLocation(), -1);
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for(final String sample: samples) {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
|
@ -368,103 +391,108 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
nMQ0Reads = 0;
|
||||
int maxDeletionLength = 0;
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
if ( state.hadIndel() ) {
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next();
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||
final int eventLength = state.getEventLength();
|
||||
|
||||
// if (op != CigarOperator.N) // N's are never added to any pileup
|
||||
// continue;
|
||||
//
|
||||
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
|
||||
size++;
|
||||
if ( state.getEventBases() == null ) {
|
||||
ExtendedEventPileupElement pileupElement;
|
||||
if (state.getEventBases() == null) { // Deletion event
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
|
||||
}
|
||||
else { // Insertion event
|
||||
nInsertions++;
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
|
||||
}
|
||||
else nInsertions++;
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
|
||||
// and add in extra reads that start after this indel. Skip these reads.
|
||||
// My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string
|
||||
// like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be
|
||||
// filtering these out.
|
||||
// TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block.
|
||||
//if(state.getRead().getAlignmentStart() > loc.getStart())
|
||||
// continue;
|
||||
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// this read has no indel associated with the previous position on the ref;
|
||||
// we count this read in only if it has actual bases, not N span...
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
|
||||
|
||||
// if cigar operator is D but the read has no extended event reported (that's why we ended
|
||||
// up in this branch), it means that we are currently inside a deletion that started earlier;
|
||||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||
size++;
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
|
||||
);
|
||||
}
|
||||
}
|
||||
indelPile.add(pileupElement);
|
||||
}
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
|
||||
// this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are:
|
||||
// we only add reads that are not N's
|
||||
// we only include deletions to the pileup if the walker requests it
|
||||
else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) {
|
||||
size++;
|
||||
indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset));
|
||||
}
|
||||
|
||||
|
||||
if (state.getRead().getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sample,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
|
||||
}
|
||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||
// System.out.println("Indel(s) at "+loc);
|
||||
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
|
||||
} else {
|
||||
GenomeLoc location = getLocation();
|
||||
Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
|
||||
|
||||
}
|
||||
|
||||
if (indelPile.size() != 0)
|
||||
fullExtendedEventPileup.put(sample, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||
}
|
||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
|
||||
}
|
||||
else { // this is a regular event pileup (not extended)
|
||||
GenomeLoc location = getLocation();
|
||||
Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
boolean hasBeenSampled = false;
|
||||
for(final String sample: samples) {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nMQ0Reads = 0;
|
||||
size = 0; // number of elements in this sample's pileup
|
||||
nDeletions = 0; // number of deletions in this sample's pileup
|
||||
nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
if ( filterBaseInRead((GATKSAMRecord) state.getRead(), location.getStart()) ) {
|
||||
//discarded_bases++;
|
||||
//printStatus("Adaptor bases", discarded_adaptor_bases);
|
||||
continue;
|
||||
} else {
|
||||
//observed_bases++;
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
|
||||
if (op == CigarOperator.D) {
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset;
|
||||
pile.add(new PileupElement(read, leftAlignedStart, true));
|
||||
size++;
|
||||
nDeletions++;
|
||||
}
|
||||
} else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
pile.add(new PileupElement(read, readOffset, false));
|
||||
size++;
|
||||
}
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
size++;
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
|
||||
if( pile.size() != 0 )
|
||||
fullPileup.put(sample,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location,fullPileup),hasBeenSampled);
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if ( readStates.isEmpty() )
|
||||
if (readStates.isEmpty())
|
||||
return false;
|
||||
else {
|
||||
SAMRecordState state = readStates.getFirst();
|
||||
|
|
@ -485,20 +513,18 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for(final String sample: samples) {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> it = readStates.iterator(sample);
|
||||
while ( it.hasNext() ) {
|
||||
while (it.hasNext()) {
|
||||
SAMRecordState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
|
||||
else {
|
||||
if (state.hadIndel() && readInfo.generateExtendedEvents())
|
||||
hasExtendedEvents = true;
|
||||
else if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
if ( op == null ) { // we've stepped off the end of the object
|
||||
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
|
||||
it.remove();
|
||||
}
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -508,20 +534,20 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class ReadStateManager {
|
||||
private class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
private final int targetCoverage;
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
|
||||
switch(this.downsamplingMethod.type) {
|
||||
switch (this.downsamplingMethod.type) {
|
||||
case BY_SAMPLE:
|
||||
if(downsamplingMethod.toCoverage == null)
|
||||
if (downsamplingMethod.toCoverage == null)
|
||||
throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||
this.targetCoverage = downsamplingMethod.toCoverage;
|
||||
break;
|
||||
|
|
@ -529,10 +555,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
this.targetCoverage = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
|
||||
for(final String sample: samples) {
|
||||
readStatesBySample.put(sample,new PerSampleReadStateManager());
|
||||
readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||
Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||
|
|
@ -541,6 +567,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
/**
|
||||
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
|
||||
* for this iterator; if present, total read states will be decremented.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
|
|
@ -569,6 +596,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager across all samples.
|
||||
*
|
||||
* @return Total number of reads over all samples.
|
||||
*/
|
||||
public int size() {
|
||||
|
|
@ -577,6 +605,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager in the given sample.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
|
|
@ -587,6 +616,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
/**
|
||||
* The extent of downsampling; basically, the furthest base out which has 'fallen
|
||||
* victim' to the downsampler.
|
||||
*
|
||||
* @param sample Sample, downsampled independently.
|
||||
* @return Integer stop of the furthest undownsampled region.
|
||||
*/
|
||||
|
|
@ -595,9 +625,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for(final String sample: samples) {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if(!reads.isEmpty())
|
||||
if (!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
|
|
@ -608,19 +638,18 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if(!iterator.hasNext())
|
||||
if (!iterator.hasNext())
|
||||
return;
|
||||
|
||||
if(readStates.size() == 0) {
|
||||
if (readStates.size() == 0) {
|
||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if(readIsPastCurrentPosition(iterator.peek()))
|
||||
if (readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
|
|
@ -629,7 +658,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
samplePartitioner.complete();
|
||||
|
||||
for(final String sample: samples) {
|
||||
for (final String sample : samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
|
@ -638,21 +667,20 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
int numReads = statesBySample.size();
|
||||
int downsamplingExtent = aggregator.getDownsamplingExtent();
|
||||
|
||||
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
|
||||
if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
|
||||
long readLimit = aggregator.getNumReadsSeen();
|
||||
addReadsToSample(statesBySample,newReads,readLimit);
|
||||
addReadsToSample(statesBySample, newReads, readLimit);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
int[] counts = statesBySample.getCountsPerAlignmentStart();
|
||||
int[] updatedCounts = new int[counts.length];
|
||||
System.arraycopy(counts,0,updatedCounts,0,counts.length);
|
||||
System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
|
||||
|
||||
boolean readPruned = true;
|
||||
while(numReads+newReads.size()>targetCoverage && readPruned) {
|
||||
while (numReads + newReads.size() > targetCoverage && readPruned) {
|
||||
readPruned = false;
|
||||
for(int alignmentStart=updatedCounts.length-1;numReads+newReads.size()>targetCoverage&&alignmentStart>=0;alignmentStart--) {
|
||||
if(updatedCounts[alignmentStart] > 1) {
|
||||
for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
|
||||
if (updatedCounts[alignmentStart] > 1) {
|
||||
updatedCounts[alignmentStart]--;
|
||||
numReads--;
|
||||
readPruned = true;
|
||||
|
|
@ -660,7 +688,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
if(numReads == targetCoverage) {
|
||||
if (numReads == targetCoverage) {
|
||||
updatedCounts[0]--;
|
||||
numReads--;
|
||||
}
|
||||
|
|
@ -668,18 +696,18 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
BitSet toPurge = new BitSet(readStates.size());
|
||||
int readOffset = 0;
|
||||
|
||||
for(int i = 0; i < updatedCounts.length; i++) {
|
||||
for (int i = 0; i < updatedCounts.length; i++) {
|
||||
int n = counts[i];
|
||||
int k = updatedCounts[i];
|
||||
|
||||
for(Integer purgedElement: MathUtils.sampleIndicesWithoutReplacement(n,n-k))
|
||||
toPurge.set(readOffset+purgedElement);
|
||||
for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
|
||||
toPurge.set(readOffset + purgedElement);
|
||||
|
||||
readOffset += counts[i];
|
||||
}
|
||||
downsamplingExtent = Math.max(downsamplingExtent,statesBySample.purge(toPurge));
|
||||
|
||||
addReadsToSample(statesBySample,newReads,targetCoverage-numReads);
|
||||
downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
|
||||
|
||||
addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
}
|
||||
}
|
||||
|
|
@ -688,23 +716,25 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
* @param maxReads Maximum number of reads to add.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
* @param maxReads Maximum number of reads to add.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
|
||||
if(reads.isEmpty())
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||
int readCount = 0;
|
||||
for(SAMRecord read: reads) {
|
||||
if(readCount < maxReads) {
|
||||
for (SAMRecord read : reads) {
|
||||
if (readCount < maxReads) {
|
||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
// TODO: What if we downsample the extended events away?
|
||||
if (state.hadIndel()) hasExtendedEvents = true;
|
||||
if (state.hadIndel())
|
||||
hasExtendedEvents = true;
|
||||
readCount++;
|
||||
}
|
||||
}
|
||||
|
|
@ -735,7 +765,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
|
||||
this.downsamplingExtent = Math.max(this.downsamplingExtent,downsamplingExtent);
|
||||
this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
|
|
@ -745,7 +775,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
public int[] getCountsPerAlignmentStart() {
|
||||
int[] counts = new int[readStateCounter.size()];
|
||||
int index = 0;
|
||||
for(Counter counter: readStateCounter)
|
||||
for (Counter counter : readStateCounter)
|
||||
counts[index++] = counter.getCount();
|
||||
return counts;
|
||||
}
|
||||
|
|
@ -766,7 +796,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
wrappedIterator.remove();
|
||||
Counter counter = readStateCounter.peek();
|
||||
counter.decrement();
|
||||
if(counter.getCount() == 0)
|
||||
if (counter.getCount() == 0)
|
||||
readStateCounter.remove();
|
||||
}
|
||||
};
|
||||
|
|
@ -775,13 +805,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
/**
|
||||
* Purge the given elements from the bitset. If an element in the bitset is true, purge
|
||||
* the corresponding read state.
|
||||
*
|
||||
* @param elements bits from the set to purge.
|
||||
* @return the extent of the final downsampled read.
|
||||
*/
|
||||
public int purge(final BitSet elements) {
|
||||
int downsamplingExtent = 0;
|
||||
|
||||
if(elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
|
||||
if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
|
||||
|
||||
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
|
||||
|
||||
|
|
@ -794,22 +825,22 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
int toPurge = elements.nextSetBit(0);
|
||||
int removedCount = 0;
|
||||
|
||||
while(readStateIterator.hasNext() && toPurge >= 0) {
|
||||
while (readStateIterator.hasNext() && toPurge >= 0) {
|
||||
SAMRecordState state = readStateIterator.next();
|
||||
downsamplingExtent = Math.max(downsamplingExtent,state.getRead().getAlignmentEnd());
|
||||
downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
|
||||
|
||||
if(readIndex == toPurge) {
|
||||
if (readIndex == toPurge) {
|
||||
readStateIterator.remove();
|
||||
currentCounter.decrement();
|
||||
if(currentCounter.getCount() == 0)
|
||||
if (currentCounter.getCount() == 0)
|
||||
counterIterator.remove();
|
||||
removedCount++;
|
||||
toPurge = elements.nextSetBit(toPurge+1);
|
||||
toPurge = elements.nextSetBit(toPurge + 1);
|
||||
}
|
||||
|
||||
readIndex++;
|
||||
alignmentStartCounter--;
|
||||
if(alignmentStartCounter == 0 && counterIterator.hasNext()) {
|
||||
if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
|
||||
currentCounter = counterIterator.next();
|
||||
alignmentStartCounter = currentCounter.getCount();
|
||||
}
|
||||
|
|
@ -849,12 +880,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
interface ReadSelector {
|
||||
/**
|
||||
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
|
||||
*
|
||||
* @param read the read to evaluate.
|
||||
*/
|
||||
public void submitRead(SAMRecord read);
|
||||
|
||||
/**
|
||||
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
|
||||
*
|
||||
* @param read the read previously rejected.
|
||||
*/
|
||||
public void notifyReadRejected(SAMRecord read);
|
||||
|
|
@ -866,12 +899,14 @@ interface ReadSelector {
|
|||
|
||||
/**
|
||||
* Retrieve the number of reads seen by this selector so far.
|
||||
*
|
||||
* @return number of reads seen.
|
||||
*/
|
||||
public long getNumReadsSeen();
|
||||
|
||||
/**
|
||||
* Return the number of reads accepted by this selector so far.
|
||||
*
|
||||
* @return number of reads selected.
|
||||
*/
|
||||
public long getNumReadsSelected();
|
||||
|
|
@ -880,12 +915,14 @@ interface ReadSelector {
|
|||
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
|
||||
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
|
||||
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
|
||||
*
|
||||
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
|
||||
*/
|
||||
public int getDownsamplingExtent();
|
||||
|
||||
/**
|
||||
* Get the reads selected by this selector.
|
||||
*
|
||||
* @return collection of reads selected by this selector.
|
||||
*/
|
||||
public Collection<SAMRecord> getSelectedReads();
|
||||
|
|
@ -911,7 +948,7 @@ class AllReadsSelector implements ReadSelector {
|
|||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
|
|
@ -949,18 +986,18 @@ class NRandomReadSelector implements ReadSelector {
|
|||
private final ReservoirDownsampler<SAMRecord> reservoir;
|
||||
private final ReadSelector chainedSelector;
|
||||
private long readsSeen = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||
this.reservoir = new ReservoirDownsampler<SAMRecord>((int)readLimit);
|
||||
this.reservoir = new ReservoirDownsampler<SAMRecord>((int) readLimit);
|
||||
this.chainedSelector = chainedSelector;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
SAMRecord displaced = reservoir.add(read);
|
||||
if(displaced != null && chainedSelector != null) {
|
||||
if (displaced != null && chainedSelector != null) {
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
readsSeen++;
|
||||
}
|
||||
|
|
@ -970,9 +1007,9 @@ class NRandomReadSelector implements ReadSelector {
|
|||
}
|
||||
|
||||
public void complete() {
|
||||
for(SAMRecord read: reservoir.getDownsampledContents())
|
||||
for (SAMRecord read : reservoir.getDownsampledContents())
|
||||
chainedSelector.submitRead(read);
|
||||
if(chainedSelector != null)
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.complete();
|
||||
}
|
||||
|
||||
|
|
@ -987,7 +1024,7 @@ class NRandomReadSelector implements ReadSelector {
|
|||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reservoir.getDownsampledContents();
|
||||
|
|
@ -996,7 +1033,7 @@ class NRandomReadSelector implements ReadSelector {
|
|||
public void reset() {
|
||||
reservoir.clear();
|
||||
downsamplingExtent = 0;
|
||||
if(chainedSelector != null)
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.reset();
|
||||
}
|
||||
}
|
||||
|
|
@ -1005,23 +1042,23 @@ class NRandomReadSelector implements ReadSelector {
|
|||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner implements ReadSelector {
|
||||
private final Map<String,ReadSelector> readsBySample;
|
||||
private final Map<String, ReadSelector> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public SamplePartitioner(Map<String,ReadSelector> readSelectors) {
|
||||
public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
|
||||
readsBySample = readSelectors;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||
if(readsBySample.containsKey(sampleName))
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submitRead(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||
if(readsBySample.containsKey(sampleName))
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
|
@ -1040,23 +1077,23 @@ class SamplePartitioner implements ReadSelector {
|
|||
|
||||
public int getDownsamplingExtent() {
|
||||
int downsamplingExtent = 0;
|
||||
for(ReadSelector storage: readsBySample.values())
|
||||
downsamplingExtent = Math.max(downsamplingExtent,storage.getDownsamplingExtent());
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
|
||||
}
|
||||
|
||||
public ReadSelector getSelectedReads(String sampleName) {
|
||||
if(!readsBySample.containsKey(sampleName))
|
||||
if (!readsBySample.containsKey(sampleName))
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for(ReadSelector storage: readsBySample.values())
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
storage.reset();
|
||||
readsSeen = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,13 +25,13 @@ public class BaseQualityRankSumTest extends RankSumTest {
|
|||
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref ) {
|
||||
if ( p.getBase() == ref )
|
||||
refQuals.add((double)p.getQual());
|
||||
} else if ( p.getBase() == alt ) {
|
||||
else if ( p.getBase() == alt )
|
||||
altQuals.add((double)p.getQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
|
|
@ -57,8 +57,6 @@ public class BaseQualityRankSumTest extends RankSumTest {
|
|||
refQuals.add(-10.0*refLikelihood);
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add(-10.0*altLikelihood);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
for (PileupElement p : sample.getValue().getBasePileup()) {
|
||||
if ( p.isDeletion() || p.isReducedRead() ) // ignore deletions and reduced reads
|
||||
if ( p.isDeletion() || p.getRead().isReducedRead() ) // ignore deletions and reduced reads
|
||||
continue;
|
||||
|
||||
if ( p.getRead().getMappingQuality() < 20 || p.getQual() < 20 )
|
||||
|
|
@ -258,7 +258,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
continue;
|
||||
|
||||
for (final PileupElement p: pileup) {
|
||||
if ( p.isReducedRead() ) // ignore reduced reads
|
||||
if ( p.getRead().isReducedRead() ) // ignore reduced reads
|
||||
continue;
|
||||
if ( p.getRead().getMappingQuality() < 20)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -62,15 +62,15 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
private final static char REGEXP_WILDCARD = '.';
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
||||
if (vc.isSNP() && !vc.isBiallelic())
|
||||
if (vc.isSNP() && !vc.isBiallelic())
|
||||
return null;
|
||||
|
||||
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
|
||||
|
||||
final int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
|
||||
final int contextWingSize = Math.min(((int) ref.getWindow().size() - 1) / 2, MIN_CONTEXT_WING_SIZE);
|
||||
final int contextSize = contextWingSize * 2 + 1;
|
||||
|
||||
final int locus = ref.getLocus().getStart() + (ref.getLocus().getStop() - ref.getLocus().getStart()) / 2;
|
||||
|
|
@ -84,14 +84,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
if (pileup == null)
|
||||
return null;
|
||||
|
||||
|
||||
final List<Haplotype> haplotypes = computeHaplotypes(pileup, contextSize, locus, vc);
|
||||
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
if (haplotypes != null) {
|
||||
for ( final Genotype genotype : vc.getGenotypes()) {
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( thisContext != null ) {
|
||||
if (thisContext != null) {
|
||||
final ReadBackedPileup thisPileup;
|
||||
if (thisContext.hasExtendedEventPileup())
|
||||
thisPileup = thisContext.getExtendedEventPileup();
|
||||
|
|
@ -102,14 +102,13 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
if (thisPileup != null) {
|
||||
if (vc.isSNP())
|
||||
scoreRA.add( scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus) ); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
else if (vc.isIndel() || vc.isMixed()) {
|
||||
Double d = scoreIndelsAgainstHaplotypes(thisPileup);
|
||||
if (d == null)
|
||||
return null;
|
||||
scoreRA.add( d ); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
else
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -122,12 +121,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return map;
|
||||
}
|
||||
|
||||
private class HaplotypeComparator implements Comparator<Haplotype>{
|
||||
private class HaplotypeComparator implements Comparator<Haplotype> {
|
||||
|
||||
public int compare(Haplotype a, Haplotype b) {
|
||||
if (a.getQualitySum() < b.getQualitySum())
|
||||
return 1;
|
||||
if (a.getQualitySum() > b.getQualitySum()){
|
||||
if (a.getQualitySum() > b.getQualitySum()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -137,39 +136,38 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
private List<Haplotype> computeHaplotypes(final ReadBackedPileup pileup, final int contextSize, final int locus, final VariantContext vc) {
|
||||
// Compute all possible haplotypes consistent with current pileup
|
||||
|
||||
int haplotypesToCompute = vc.getAlternateAlleles().size()+1;
|
||||
int haplotypesToCompute = vc.getAlternateAlleles().size() + 1;
|
||||
|
||||
final PriorityQueue<Haplotype> candidateHaplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
||||
final PriorityQueue<Haplotype> consensusHaplotypeQueue = new PriorityQueue<Haplotype>(MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator());
|
||||
|
||||
for ( final PileupElement p : pileup ) {
|
||||
for (final PileupElement p : pileup) {
|
||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||
}
|
||||
|
||||
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
||||
Haplotype elem;
|
||||
while ((elem = candidateHaplotypeQueue.poll()) != null) {
|
||||
while ((elem = candidateHaplotypeQueue.poll()) != null) {
|
||||
boolean foundHaplotypeMatch = false;
|
||||
Haplotype lastCheckedHaplotype = null;
|
||||
for ( final Haplotype haplotypeFromList : consensusHaplotypeQueue ) {
|
||||
for (final Haplotype haplotypeFromList : consensusHaplotypeQueue) {
|
||||
final Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
|
||||
if (consensusHaplotype != null) {
|
||||
if (consensusHaplotype != null) {
|
||||
foundHaplotypeMatch = true;
|
||||
if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
|
||||
consensusHaplotypeQueue.remove(haplotypeFromList);
|
||||
consensusHaplotypeQueue.add(consensusHaplotype);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
lastCheckedHaplotype = haplotypeFromList;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundHaplotypeMatch && consensusHaplotypeQueue.size() < MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER) {
|
||||
consensusHaplotypeQueue.add(elem);
|
||||
} else if (!foundHaplotypeMatch && lastCheckedHaplotype != null && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum() ) {
|
||||
} else if (!foundHaplotypeMatch && lastCheckedHaplotype != null && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum()) {
|
||||
consensusHaplotypeQueue.remove(lastCheckedHaplotype);
|
||||
consensusHaplotypeQueue.add(elem);
|
||||
}
|
||||
|
|
@ -180,12 +178,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// The consensus haplotypes are in a quality-ordered priority queue, so the best haplotypes are just the ones at the front of the queue
|
||||
final Haplotype haplotype1 = consensusHaplotypeQueue.poll();
|
||||
|
||||
List<Haplotype>hlist = new ArrayList<Haplotype>();
|
||||
List<Haplotype> hlist = new ArrayList<Haplotype>();
|
||||
hlist.add(new Haplotype(haplotype1.getBases(), 60));
|
||||
|
||||
for (int k=1; k < haplotypesToCompute; k++) {
|
||||
for (int k = 1; k < haplotypesToCompute; k++) {
|
||||
Haplotype haplotype2 = consensusHaplotypeQueue.poll();
|
||||
if(haplotype2 == null ) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found
|
||||
if (haplotype2 == null) {
|
||||
haplotype2 = haplotype1;
|
||||
} // Sometimes only the reference haplotype can be found
|
||||
hlist.add(new Haplotype(haplotype2.getBases(), 20));
|
||||
}
|
||||
return hlist;
|
||||
|
|
@ -194,36 +194,43 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||
final SAMRecord read = p.getRead();
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
|
||||
final byte[] haplotypeBases = new byte[contextSize];
|
||||
Arrays.fill(haplotypeBases, (byte)REGEXP_WILDCARD);
|
||||
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
||||
final double[] baseQualities = new double[contextSize];
|
||||
Arrays.fill(baseQualities, 0.0);
|
||||
|
||||
byte[] readBases = read.getReadBases();
|
||||
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
||||
readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
||||
byte[] readQuals = read.getBaseQualities();
|
||||
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||
readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), readOffsetFromPileup, p.getRead().getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||
|
||||
for (int i = 0; i < contextSize; i++ ) {
|
||||
for (int i = 0; i < contextSize; i++) {
|
||||
final int baseOffset = i + baseOffsetStart;
|
||||
if ( baseOffset < 0 ) {
|
||||
if (baseOffset < 0) {
|
||||
continue;
|
||||
}
|
||||
if ( baseOffset >= readBases.length ) {
|
||||
if (baseOffset >= readBases.length) {
|
||||
break;
|
||||
}
|
||||
if( readQuals[baseOffset] == PileupElement.DELETION_BASE) { readQuals[baseOffset] = PileupElement.DELETION_QUAL; }
|
||||
if( !BaseUtils.isRegularBase(readBases[baseOffset]) ) { readBases[baseOffset] = (byte)REGEXP_WILDCARD; readQuals[baseOffset] = (byte) 0; } // N's shouldn't be treated as distinct bases
|
||||
readQuals[baseOffset] = (byte)Math.min((int)readQuals[baseOffset], p.getMappingQual());
|
||||
if( ((int)readQuals[baseOffset]) < 5 ) { readQuals[baseOffset] = (byte) 0; } // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
if (readQuals[baseOffset] == PileupElement.DELETION_BASE) {
|
||||
readQuals[baseOffset] = PileupElement.DELETION_QUAL;
|
||||
}
|
||||
if (!BaseUtils.isRegularBase(readBases[baseOffset])) {
|
||||
readBases[baseOffset] = (byte) REGEXP_WILDCARD;
|
||||
readQuals[baseOffset] = (byte) 0;
|
||||
} // N's shouldn't be treated as distinct bases
|
||||
readQuals[baseOffset] = (byte) Math.min((int) readQuals[baseOffset], p.getMappingQual());
|
||||
if (((int) readQuals[baseOffset]) < 5) {
|
||||
readQuals[baseOffset] = (byte) 0;
|
||||
} // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
haplotypeBases[i] = readBases[baseOffset];
|
||||
baseQualities[i] = (double)readQuals[baseOffset];
|
||||
baseQualities[i] = (double) readQuals[baseOffset];
|
||||
}
|
||||
|
||||
return new Haplotype(haplotypeBases, baseQualities);
|
||||
|
|
@ -238,7 +245,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
byte chA, chB;
|
||||
final byte wc = (byte)REGEXP_WILDCARD;
|
||||
final byte wc = (byte) REGEXP_WILDCARD;
|
||||
|
||||
final int length = a.length;
|
||||
final byte[] consensusChars = new byte[length];
|
||||
|
|
@ -247,7 +254,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final double[] qualsA = haplotypeA.getQuals();
|
||||
final double[] qualsB = haplotypeB.getQuals();
|
||||
|
||||
for (int i=0; i < length; i++) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
chA = a[i];
|
||||
chB = b[i];
|
||||
|
||||
|
|
@ -257,17 +264,15 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
if ((chA == wc) && (chB == wc)) {
|
||||
consensusChars[i] = wc;
|
||||
consensusQuals[i] = 0.0;
|
||||
}
|
||||
else if ((chA == wc)) {
|
||||
} else if ((chA == wc)) {
|
||||
consensusChars[i] = chB;
|
||||
consensusQuals[i] = qualsB[i];
|
||||
}
|
||||
else if ((chB == wc)){
|
||||
} else if ((chB == wc)) {
|
||||
consensusChars[i] = chA;
|
||||
consensusQuals[i] = qualsA[i];
|
||||
} else {
|
||||
consensusChars[i] = chA;
|
||||
consensusQuals[i] = qualsA[i]+qualsB[i];
|
||||
consensusQuals[i] = qualsA[i] + qualsB[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -276,31 +281,33 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||
private double scoreReadsAgainstHaplotypes(final List<Haplotype> haplotypes, final ReadBackedPileup pileup, final int contextSize, final int locus) {
|
||||
if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||
if ( DEBUG ) System.out.printf("HAP2: %s%n", haplotypes.get(1));
|
||||
if (DEBUG) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||
if (DEBUG) System.out.printf("HAP2: %s%n", haplotypes.get(1));
|
||||
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
for ( final PileupElement p : pileup ) {
|
||||
for (final PileupElement p : pileup) {
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[haplotypes.size()];
|
||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
||||
for (int i = 0; i < haplotypes.size(); i++) {
|
||||
final Haplotype haplotype = haplotypes.get(i);
|
||||
final double score = scoreReadAgainstHaplotype(p, contextSize, haplotype, locus);
|
||||
scores[i] = score;
|
||||
if ( DEBUG ) { System.out.printf(" vs. haplotype %d = %f%n", i, score); }
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i, score);
|
||||
}
|
||||
}
|
||||
haplotypeScores.add(scores);
|
||||
}
|
||||
|
||||
double overallScore = 0.0;
|
||||
for ( final double[] readHaplotypeScores : haplotypeScores ) {
|
||||
for (final double[] readHaplotypeScores : haplotypeScores) {
|
||||
overallScore += MathUtils.arrayMin(readHaplotypeScores);
|
||||
}
|
||||
|
||||
return overallScore;
|
||||
}
|
||||
|
||||
private double scoreReadAgainstHaplotype(final PileupElement p, final int contextSize, final Haplotype haplotype, final int locus ) {
|
||||
private double scoreReadAgainstHaplotype(final PileupElement p, final int contextSize, final Haplotype haplotype, final int locus) {
|
||||
double expected = 0.0;
|
||||
double mismatches = 0.0;
|
||||
|
||||
|
|
@ -315,33 +322,35 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
||||
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
||||
final byte[] haplotypeBases = haplotype.getBases();
|
||||
final SAMRecord read = p.getRead();
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
byte[] readBases = read.getReadBases();
|
||||
|
||||
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
||||
byte[] readQuals = read.getBaseQualities();
|
||||
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), readOffsetFromPileup, p.getRead().getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||
|
||||
for ( int i = 0; i < contextSize; i++ ) {
|
||||
for (int i = 0; i < contextSize; i++) {
|
||||
final int baseOffset = i + baseOffsetStart;
|
||||
if ( baseOffset < 0 ) {
|
||||
if (baseOffset < 0) {
|
||||
continue;
|
||||
}
|
||||
if ( baseOffset >= readBases.length ) {
|
||||
if (baseOffset >= readBases.length) {
|
||||
break;
|
||||
}
|
||||
|
||||
final byte haplotypeBase = haplotypeBases[i];
|
||||
final byte readBase = readBases[baseOffset];
|
||||
|
||||
final boolean matched = ( readBase == haplotypeBase || haplotypeBase == (byte)REGEXP_WILDCARD );
|
||||
final boolean matched = (readBase == haplotypeBase || haplotypeBase == (byte) REGEXP_WILDCARD);
|
||||
byte qual = readQuals[baseOffset];
|
||||
if( qual == PileupElement.DELETION_BASE ) { qual = PileupElement.DELETION_QUAL; } // calcAlignmentByteArrayOffset fills the readQuals array with DELETION_BASE at deletions
|
||||
qual = (byte)Math.min((int)qual, p.getMappingQual());
|
||||
if( ((int) qual) >= 5 ) { // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
if (qual == PileupElement.DELETION_BASE) {
|
||||
qual = PileupElement.DELETION_QUAL;
|
||||
} // calcAlignmentByteArrayOffset fills the readQuals array with DELETION_BASE at deletions
|
||||
qual = (byte) Math.min((int) qual, p.getMappingQual());
|
||||
if (((int) qual) >= 5) { // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
final double e = QualityUtils.qualToErrorProb(qual);
|
||||
expected += e;
|
||||
mismatches += matched ? e : 1.0 - e / 3.0;
|
||||
|
|
@ -355,26 +364,27 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
|
||||
|
||||
private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) {
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
|
||||
final HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
|
||||
if (indelLikelihoodMap== null)
|
||||
if (indelLikelihoodMap == null)
|
||||
return null;
|
||||
|
||||
for (final PileupElement p: pileup) {
|
||||
for (final PileupElement p : pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p);
|
||||
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[el.size()];
|
||||
int i = 0;
|
||||
for (Allele a: el.keySet() ) {
|
||||
for (Allele a : el.keySet()) {
|
||||
scores[i++] = -el.get(a);
|
||||
if ( DEBUG ) { System.out.printf(" vs. haplotype %d = %f%n", i-1, scores[i-1]); }
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
haplotypeScores.add(scores);
|
||||
|
|
@ -383,7 +393,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
// indel likelihoods are stric log-probs, not phred scored
|
||||
double overallScore = 0.0;
|
||||
for ( final double[] readHaplotypeScores : haplotypeScores ) {
|
||||
for (final double[] readHaplotypeScores : haplotypeScores) {
|
||||
overallScore += MathUtils.arrayMin(readHaplotypeScores);
|
||||
}
|
||||
|
||||
|
|
@ -392,6 +402,11 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); }
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList("HaplotypeScore");
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,11 +30,11 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
static final boolean DEBUG = false;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
if (stratifiedContexts.size() == 0)
|
||||
return null;
|
||||
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
if (genotypes == null || genotypes.size() == 0)
|
||||
return null;
|
||||
|
||||
|
||||
|
|
@ -43,19 +43,18 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
|
||||
if (vc.isSNP() && vc.isBiallelic()) {
|
||||
// todo - no current support for multiallelic snps
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null ) {
|
||||
if (context == null) {
|
||||
continue;
|
||||
}
|
||||
fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).getBases()[0], context.getBasePileup(), refQuals, altQuals);
|
||||
}
|
||||
}
|
||||
else if (vc.isIndel() || vc.isMixed()) {
|
||||
} else if (vc.isIndel() || vc.isMixed()) {
|
||||
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null ) {
|
||||
if (context == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -74,46 +73,47 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
|
||||
fillIndelQualsFromPileup(pileup, refQuals, altQuals);
|
||||
}
|
||||
}
|
||||
else
|
||||
} else
|
||||
return null;
|
||||
|
||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
||||
for ( final Double qual : altQuals ) {
|
||||
for (final Double qual : altQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
||||
}
|
||||
for ( final Double qual : refQuals ) {
|
||||
for (final Double qual : refQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("%s, REF QUALS:",this.getClass().getName());
|
||||
for ( final Double qual : refQuals )
|
||||
System.out.format("%4.1f ",qual);
|
||||
System.out.format("%s, REF QUALS:", this.getClass().getName());
|
||||
for (final Double qual : refQuals)
|
||||
System.out.format("%4.1f ", qual);
|
||||
System.out.println();
|
||||
System.out.format("%s, ALT QUALS:",this.getClass().getName());
|
||||
for ( final Double qual : altQuals )
|
||||
System.out.format("%4.1f ",qual);
|
||||
System.out.format("%s, ALT QUALS:", this.getClass().getName());
|
||||
for (final Double qual : altQuals)
|
||||
System.out.format("%4.1f ", qual);
|
||||
System.out.println();
|
||||
|
||||
}
|
||||
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
|
||||
final Pair<Double,Double> testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 );
|
||||
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
if ( ! Double.isNaN(testResults.first) )
|
||||
if (!Double.isNaN(testResults.first))
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
protected abstract void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected static boolean isUsableBase( final PileupElement p ) {
|
||||
return !( p.isDeletion() ||
|
||||
p.getMappingQual() == 0 ||
|
||||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
|
||||
((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here
|
||||
protected static boolean isUsableBase(final PileupElement p) {
|
||||
return !(p.isInsertionAtBeginningOfRead() ||
|
||||
p.isDeletion() ||
|
||||
p.getMappingQual() == 0 ||
|
||||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
|
||||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here
|
||||
}
|
||||
}
|
||||
|
|
@ -24,27 +24,32 @@ import java.util.List;
|
|||
*/
|
||||
public class ReadPosRankSumTest extends RankSumTest {
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList("ReadPosRankSum");
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if( isUsableBase(p) ) {
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0);
|
||||
for (final PileupElement p : pileup) {
|
||||
if (isUsableBase(p)) {
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
|
||||
if( readPos > numAlignedBases / 2 ) {
|
||||
readPos = numAlignedBases - ( readPos + 1 );
|
||||
}
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
|
||||
|
||||
if (p.getBase() == ref)
|
||||
refQuals.add((double) readPos);
|
||||
else if (p.getBase() == alt)
|
||||
altQuals.add((double) readPos);
|
||||
|
||||
if ( p.getBase() == ref ) {
|
||||
refQuals.add( (double)readPos );
|
||||
} else if ( p.getBase() == alt ) {
|
||||
altQuals.add( (double)readPos );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
|
||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
||||
|
|
@ -52,18 +57,15 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.
|
||||
// If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pielup element is "ref"
|
||||
// otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt"
|
||||
final HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p : pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
|
||||
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
refLikelihood = el.get(a);
|
||||
else {
|
||||
double like = el.get(a);
|
||||
if (like >= altLikelihood)
|
||||
|
|
@ -75,23 +77,22 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
final int numAlignedBases = getNumAlignedBases(p.getRead());
|
||||
|
||||
int rp = readPos;
|
||||
if( readPos > numAlignedBases / 2 ) {
|
||||
readPos = numAlignedBases - ( readPos + 1 );
|
||||
if (readPos > numAlignedBases / 2) {
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
}
|
||||
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
|
||||
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
|
||||
|
||||
|
||||
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
|
||||
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
|
||||
// if (readPos < -1)
|
||||
// if (readPos < -1)
|
||||
// return;
|
||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
refQuals.add((double)readPos);
|
||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
refQuals.add((double) readPos);
|
||||
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
}
|
||||
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
altQuals.add((double)readPos);
|
||||
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
} else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
altQuals.add((double) readPos);
|
||||
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -115,7 +116,7 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
|
||||
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
|
||||
// and may leave a string of Q2 bases still hanging off the reads.
|
||||
for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) {
|
||||
for (int i = numStartClippedBases; i < unclippedReadBases.length; i++) {
|
||||
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
|
||||
numStartClippedBases++;
|
||||
else
|
||||
|
|
@ -134,7 +135,7 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
// compute total number of clipped bases (soft or hard clipped)
|
||||
// check for hard clips (never consider these bases):
|
||||
final Cigar c = read.getCigar();
|
||||
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
|
||||
CigarElement last = c.getCigarElement(c.numCigarElements() - 1);
|
||||
|
||||
int numEndClippedBases = 0;
|
||||
if (last.getOperator() == CigarOperator.H) {
|
||||
|
|
@ -145,7 +146,7 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
|
||||
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
|
||||
// and may leave a string of Q2 bases still hanging off the reads.
|
||||
for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){
|
||||
for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) {
|
||||
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
|
||||
numEndClippedBases++;
|
||||
else
|
||||
|
|
@ -157,8 +158,6 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
}
|
||||
|
||||
int getOffsetFromClippedReadStart(SAMRecord read, int offset) {
|
||||
|
||||
|
||||
return offset - getNumClippedBasesAtStart(read);
|
||||
return offset - getNumClippedBasesAtStart(read);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -278,7 +278,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
if ( qual == 0 )
|
||||
return 0;
|
||||
|
||||
if ( elt.isReducedRead() ) {
|
||||
if ( elt.getRead().isReducedRead() ) {
|
||||
// reduced read representation
|
||||
if ( BaseUtils.isRegularBase( obsBase )) {
|
||||
int representativeCount = elt.getRepresentativeCount();
|
||||
|
|
|
|||
|
|
@ -60,14 +60,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private final int maxAlternateAlleles;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>>() {
|
||||
protected synchronized HashMap<PileupElement,LinkedHashMap<Allele,Double>> initialValue() {
|
||||
return new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
return new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
}
|
||||
};
|
||||
|
||||
private LinkedHashMap<Allele,Haplotype> haplotypeMap;
|
||||
private LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
||||
|
||||
// gdebug removeme
|
||||
// todo -cleanup
|
||||
|
|
@ -75,13 +75,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private ArrayList<Allele> alleleList;
|
||||
|
||||
static {
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
}
|
||||
|
||||
|
||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
alleleList = new ArrayList<Allele>();
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
|
|
@ -91,7 +91,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
|
||||
doMultiAllelicCalls = UAC.MULTI_ALLELIC;
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
|
|
@ -99,15 +99,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
|
||||
Allele refAllele=null, altAllele=null;
|
||||
Allele refAllele = null, altAllele = null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
||||
HashMap<String,Integer> consensusIndelStrings = new HashMap<String,Integer>();
|
||||
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
|
||||
|
||||
int insCount = 0, delCount = 0;
|
||||
// quick check of total number of indels in pileup
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
|
@ -118,21 +118,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return aList;
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
||||
|
||||
|
||||
|
||||
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
|
||||
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
|
||||
//SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
if (ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -151,62 +149,57 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int cnt = consensusIndelStrings.get(s);
|
||||
if (s.startsWith(indelString)){
|
||||
if (s.startsWith(indelString)) {
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
consensusIndelStrings.put(s,cnt+1);
|
||||
consensusIndelStrings.put(s, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
} else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
consensusIndelStrings.remove(s);
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
consensusIndelStrings.put(indelString,1);
|
||||
consensusIndelStrings.put(indelString, 1);
|
||||
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
} else if (read.getAlignmentStart() == loc.getStart() + 1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int cnt = consensusIndelStrings.get(s);
|
||||
if (s.endsWith(indelString)){
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion is suffix of indel in hash map
|
||||
consensusIndelStrings.put(s,cnt+1);
|
||||
consensusIndelStrings.put(s, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
} else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is suffix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
|
||||
consensusIndelStrings.remove(s);
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
consensusIndelStrings.put(indelString,1);
|
||||
consensusIndelStrings.put(indelString, 1);
|
||||
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to hash map
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
} else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d", p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -227,18 +220,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// get deletion length
|
||||
int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||
int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases,true);
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
|
|
@ -288,7 +280,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup, GenomeLocParser locParser) {
|
||||
|
||||
if ( tracker == null )
|
||||
if (tracker == null)
|
||||
return null;
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
|
|
@ -299,12 +291,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// starting a new site: clear allele list
|
||||
alleleList.clear();
|
||||
lastSiteVisited = ref.getLocus();
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
if (getAlleleListFromVCF) {
|
||||
for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
|
||||
if( vc_input != null &&
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
|
||||
if (vc_input != null &&
|
||||
allowableTypes.contains(vc_input.getType()) &&
|
||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
|
|
@ -312,7 +304,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
}
|
||||
// ignore places where we don't have a variant
|
||||
if ( vc == null )
|
||||
if (vc == null)
|
||||
return null;
|
||||
|
||||
alleleList.clear();
|
||||
|
|
@ -324,15 +316,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
else
|
||||
alleleList.add(a);
|
||||
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
for (Allele a : vc.getAlleles())
|
||||
alleleList.add(a);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser);
|
||||
} else {
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
|
@ -342,9 +332,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return null;
|
||||
|
||||
// check if there is enough reference window to create haplotypes (can be an issue at end of contigs)
|
||||
if (ref.getWindow().getStop() < loc.getStop()+HAPLOTYPE_SIZE)
|
||||
if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE)
|
||||
return null;
|
||||
if ( !(priors instanceof DiploidIndelGenotypePriors) )
|
||||
if (!(priors instanceof DiploidIndelGenotypePriors))
|
||||
throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model");
|
||||
|
||||
if (alleleList.isEmpty())
|
||||
|
|
@ -355,8 +345,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
// look for alt allele that has biggest length distance to ref allele
|
||||
int maxLenDiff = 0;
|
||||
for (Allele a: alleleList) {
|
||||
if(a.isNonReference()) {
|
||||
for (Allele a : alleleList) {
|
||||
if (a.isNonReference()) {
|
||||
int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
|
||||
if (lenDiff > maxLenDiff) {
|
||||
maxLenDiff = lenDiff;
|
||||
|
|
@ -366,11 +356,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
|
||||
final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||
final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
||||
final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
||||
final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
|
||||
final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||
|
||||
if (hsize <=0) {
|
||||
logger.warn(String.format("Warning: event at location %s can't be genotyped, skipping",loc.toString()));
|
||||
if (hsize <= 0) {
|
||||
logger.warn(String.format("Warning: event at location %s can't be genotyped, skipping", loc.toString()));
|
||||
return null;
|
||||
}
|
||||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||
|
|
@ -388,7 +378,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// For each sample, get genotype likelihoods based on pileup
|
||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
ReadBackedPileup pileup = null;
|
||||
|
|
@ -397,8 +387,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
else if (context.hasBasePileup())
|
||||
pileup = context.getBasePileup();
|
||||
|
||||
if (pileup != null ) {
|
||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
if (pileup != null) {
|
||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
@ -407,9 +397,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
||||
for (int k=0; k < genotypeLikelihoods.length; k++)
|
||||
System.out.format("%1.4f ",genotypeLikelihoods[k]);
|
||||
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
|
||||
for (int k = 0; k < genotypeLikelihoods.length; k++)
|
||||
System.out.format("%1.4f ", genotypeLikelihoods[k]);
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
|
@ -421,21 +411,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean hasNullAltAllele = false;
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.isNull() ) {
|
||||
for (Allele a : alleles) {
|
||||
if (a.isNull()) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int endLoc = loc.getStart() + refAllele.length();
|
||||
if( !hasNullAltAllele )
|
||||
if (!hasNullAltAllele)
|
||||
endLoc--;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
||||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
|
|
@ -443,8 +433,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// so that per-sample DP will include deletions covering the event.
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
for ( PileupElement p : pileup ) {
|
||||
if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) )
|
||||
for (PileupElement p : pileup) {
|
||||
if (p.isDeletion() || p.isInsertionAtBeginningOfRead() || BaseUtils.isRegularBase(p.getBase()))
|
||||
count++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
|
||||
public class BAQedPileupElement extends PileupElement {
|
||||
public BAQedPileupElement( final PileupElement PE ) {
|
||||
super(PE.getRead(), PE.getOffset());
|
||||
super(PE.getRead(), PE.getOffset(), PE.isDeletion());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import java.util.*;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPileup<RBP,PE>,PE extends PileupElement> implements ReadBackedPileup {
|
||||
public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPileup<RBP, PE>, PE extends PileupElement> implements ReadBackedPileup {
|
||||
protected final GenomeLoc loc;
|
||||
protected final PileupElementTracker<PE> pileupElementTracker;
|
||||
|
||||
|
|
@ -55,23 +55,18 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
|
||||
* go changing the reads.
|
||||
*
|
||||
* @param loc
|
||||
* @param loc The genome loc to associate reads wotj
|
||||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets) {
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads,offsets);
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads, offsets);
|
||||
}
|
||||
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads,offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc without any aligned reads
|
||||
*
|
||||
*/
|
||||
public AbstractReadBackedPileup(GenomeLoc loc) {
|
||||
this(loc, new UnifiedPileupElementTracker<PE>());
|
||||
|
|
@ -81,11 +76,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup) {
|
||||
if ( loc == null ) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup");
|
||||
if ( pileup == null ) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup");
|
||||
if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup");
|
||||
if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = new UnifiedPileupElementTracker<PE>(pileup);
|
||||
|
|
@ -94,12 +88,13 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
*
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup, int size, int nDeletions, int nMQ0Reads) {
|
||||
if ( loc == null ) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup");
|
||||
if ( pileup == null ) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup");
|
||||
if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup");
|
||||
if (pileup == null) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = new UnifiedPileupElementTracker<PE>(pileup);
|
||||
|
|
@ -115,16 +110,21 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
calculateCachedData();
|
||||
}
|
||||
|
||||
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
|
||||
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String, ? extends AbstractReadBackedPileup<RBP, PE>> pileupsBySample) {
|
||||
this.loc = loc;
|
||||
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
|
||||
for(Map.Entry<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
|
||||
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
|
||||
for (Map.Entry<String, ? extends AbstractReadBackedPileup<RBP, PE>> pileupEntry : pileupsBySample.entrySet()) {
|
||||
tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker);
|
||||
addPileupToCumulativeStats(pileupEntry.getValue());
|
||||
}
|
||||
this.pileupElementTracker = tracker;
|
||||
}
|
||||
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, int offset) {
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
|
|
@ -135,12 +135,12 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
nDeletions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( PileupElement p : pileupElementTracker ) {
|
||||
for (PileupElement p : pileupElementTracker) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
if (p.isDeletion()) {
|
||||
nDeletions++;
|
||||
}
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
if (p.getRead().getMappingQuality() == 0) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
|
|
@ -148,12 +148,12 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
protected void calculateAbstractSize() {
|
||||
abstractSize = 0;
|
||||
for ( PileupElement p : pileupElementTracker ) {
|
||||
for (PileupElement p : pileupElementTracker) {
|
||||
abstractSize += p.getRepresentativeCount();
|
||||
}
|
||||
}
|
||||
|
||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP,PE> pileup) {
|
||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP, PE> pileup) {
|
||||
size += pileup.getNumberOfElements();
|
||||
abstractSize += pileup.depthOfCoverage();
|
||||
nDeletions += pileup.getNumberOfDeletions();
|
||||
|
|
@ -167,14 +167,17 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @param offsets
|
||||
* @return
|
||||
*/
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offsets == null ) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
|
||||
if ( reads.size() != offsets.size() ) throw new ReviewedStingException("Reads and offset lists have different sizes!");
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, List<Integer> offsets) {
|
||||
if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
|
||||
if (reads.size() != offsets.size())
|
||||
throw new ReviewedStingException("Reads and offset lists have different sizes!");
|
||||
|
||||
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add(createNewPileupElement(reads.get(i),offsets.get(i)));
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
GATKSAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
||||
}
|
||||
|
||||
return pileup;
|
||||
|
|
@ -187,20 +190,21 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, int offset ) {
|
||||
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offset < 0 ) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, int offset) {
|
||||
if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
|
||||
|
||||
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add(createNewPileupElement( reads.get(i), offset ));
|
||||
for (GATKSAMRecord read : reads) {
|
||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
||||
}
|
||||
|
||||
return pileup;
|
||||
}
|
||||
|
||||
protected abstract AbstractReadBackedPileup<RBP,PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset);
|
||||
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
||||
|
||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion);
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
|
|
@ -217,32 +221,31 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public RBP getPileupWithoutDeletions() {
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (getNumberOfDeletions() > 0) {
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions();
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : tracker ) {
|
||||
if ( !p.isDeletion() ) {
|
||||
for (PE p : tracker) {
|
||||
if (!p.isDeletion()) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
} else {
|
||||
return (RBP)this;
|
||||
return (RBP) this;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -256,21 +259,20 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public RBP getOverlappingFragmentFilteredPileup() {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup();
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
Map<String,PE> filteredPileup = new HashMap<String, PE>();
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
Map<String, PE> filteredPileup = new HashMap<String, PE>();
|
||||
|
||||
for ( PE p : pileupElementTracker ) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
String readName = p.getRead().getReadName();
|
||||
|
||||
// if we've never seen this read before, life is good
|
||||
|
|
@ -292,10 +294,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE filteredElement: filteredPileup.values())
|
||||
for (PE filteredElement : filteredPileup.values())
|
||||
filteredTracker.add(filteredElement);
|
||||
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -309,300 +311,299 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public RBP getPileupWithoutMappingQualityZeroReads() {
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (getNumberOfMappingQualityZeroReads() > 0) {
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPileupWithoutMappingQualityZeroReads();
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : tracker ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
for (PE p : tracker) {
|
||||
if (p.getRead().getMappingQuality() > 0) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
} else {
|
||||
return (RBP)this;
|
||||
return (RBP) this;
|
||||
}
|
||||
}
|
||||
|
||||
public RBP getPositiveStrandPileup() {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPositiveStrandPileup();
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : tracker ) {
|
||||
if ( !p.getRead().getReadNegativeStrandFlag() ) {
|
||||
for (PE p : tracker) {
|
||||
if (!p.getRead().getReadNegativeStrandFlag()) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pileup consisting of only reads on the negative strand.
|
||||
*
|
||||
* @return A read-backed pileup consisting only of reads on the negative strand.
|
||||
*/
|
||||
public RBP getNegativeStrandPileup() {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getNegativeStrandPileup();
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : tracker ) {
|
||||
if ( p.getRead().getReadNegativeStrandFlag() ) {
|
||||
for (PE p : tracker) {
|
||||
if (p.getRead().getReadNegativeStrandFlag()) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a pileup consisting of all those elements passed by a given filter.
|
||||
*
|
||||
* @param filter Filter to use when testing for elements.
|
||||
* @return a pileup without the given filtered elements.
|
||||
*/
|
||||
public RBP getFilteredPileup(PileupElementFilter filter) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getFilteredPileup(filter);
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : pileupElementTracker ) {
|
||||
if( filter.allow(p) )
|
||||
for (PE p : pileupElementTracker) {
|
||||
if (filter.allow(p))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||
/**
|
||||
* Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||
*
|
||||
* @param minBaseQ
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public RBP getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
public RBP getBaseAndMappingFilteredPileup(int minBaseQ, int minMapQ) {
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ, minMapQ);
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
for ( PE p : pileupElementTracker ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ &&
|
||||
for (PE p : pileupElementTracker) {
|
||||
if (p.getRead().getMappingQuality() >= minMapQ &&
|
||||
(p.isDeletion() ||
|
||||
((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) ||
|
||||
p.getQual() >= minBaseQ) ) {
|
||||
((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement) p).getType() == ExtendedEventPileupElement.Type.NOEVENT) ||
|
||||
p.getQual() >= minBaseQ)) {
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(loc, filteredTracker);
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
||||
/**
|
||||
* Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
*
|
||||
* @param minBaseQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public RBP getBaseFilteredPileup( int minBaseQ ) {
|
||||
public RBP getBaseFilteredPileup(int minBaseQ) {
|
||||
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
||||
/**
|
||||
* Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
*
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public RBP getMappingFilteredPileup( int minMapQ ) {
|
||||
public RBP getMappingFilteredPileup(int minMapQ) {
|
||||
return getBaseAndMappingFilteredPileup(-1, minMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the read groups represented in this pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Collection<String> getReadGroups() {
|
||||
Set<String> readGroups = new HashSet<String>();
|
||||
for(PileupElement pileupElement: this)
|
||||
for (PileupElement pileupElement : this)
|
||||
readGroups.add(pileupElement.getRead().getReadGroup().getReadGroupId());
|
||||
return readGroups;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pileup for a given read group. Horrendously inefficient at this point.
|
||||
*
|
||||
* @param targetReadGroupId Identifier for the read group.
|
||||
* @return A read-backed pileup containing only the reads in the given read group.
|
||||
*/
|
||||
@Override
|
||||
public RBP getPileupForReadGroup(String targetReadGroupId) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId);
|
||||
if(pileup != null)
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroup(targetReadGroupId);
|
||||
if (pileup != null)
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
}
|
||||
else {
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(targetReadGroupId != null) {
|
||||
if(read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId()))
|
||||
if (targetReadGroupId != null) {
|
||||
if (read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
} else {
|
||||
if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pileup for a set of read groups. Horrendously inefficient at this point.
|
||||
*
|
||||
* @param rgSet List of identifiers for the read groups.
|
||||
* @return A read-backed pileup containing only the reads in the given read groups.
|
||||
*/
|
||||
@Override
|
||||
public RBP getPileupForReadGroups(final HashSet<String> rgSet) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroups(rgSet);
|
||||
if(pileup != null)
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroups(rgSet);
|
||||
if (pileup != null)
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
}
|
||||
else {
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(rgSet != null && !rgSet.isEmpty()) {
|
||||
if(read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId()))
|
||||
if (rgSet != null && !rgSet.isEmpty()) {
|
||||
if (read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
} else {
|
||||
if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RBP getPileupForLane(String laneID) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForLane(laneID);
|
||||
if(pileup != null)
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getPileupForLane(laneID);
|
||||
if (pileup != null)
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
}
|
||||
else {
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(laneID != null) {
|
||||
if(read.getReadGroup() != null &&
|
||||
(read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different
|
||||
(read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same
|
||||
if (laneID != null) {
|
||||
if (read.getReadGroup() != null &&
|
||||
(read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different
|
||||
(read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
} else {
|
||||
if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<String> getSamples() {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
return new HashSet<String>(tracker.getSamples());
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Collection<String> sampleNames = new HashSet<String>();
|
||||
for(PileupElement p: this) {
|
||||
for (PileupElement p : this) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
sampleNames.add(sampleName);
|
||||
|
|
@ -619,103 +620,98 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public RBP getDownsampledPileup(int desiredCoverage) {
|
||||
if ( getNumberOfElements() <= desiredCoverage )
|
||||
return (RBP)this;
|
||||
if (getNumberOfElements() <= desiredCoverage)
|
||||
return (RBP) this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size)) )
|
||||
for (int i = 0; i < desiredCoverage; /* no update */) {
|
||||
if (positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size)))
|
||||
i++;
|
||||
}
|
||||
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
int current = 0;
|
||||
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
|
||||
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for(PileupElement p: perSampleElements) {
|
||||
if(positions.contains(current))
|
||||
for (PileupElement p : perSampleElements) {
|
||||
if (positions.contains(current))
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
|
||||
if(!filteredPileup.isEmpty()) {
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements);
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
if (!filteredPileup.isEmpty()) {
|
||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements);
|
||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
||||
}
|
||||
|
||||
current++;
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(loc,filteredTracker);
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
|
||||
return (RBP) createNewPileup(loc, filteredTracker);
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
|
||||
Iterator positionIter = positions.iterator();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
while (positionIter.hasNext()) {
|
||||
int nextReadToKeep = (Integer) positionIter.next();
|
||||
filteredTracker.add(tracker.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return (RBP)createNewPileup(getLocation(), filteredTracker);
|
||||
return (RBP) createNewPileup(getLocation(), filteredTracker);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSamples(Collection<String> sampleNames) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
|
||||
return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
|
||||
}
|
||||
else {
|
||||
return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null;
|
||||
} else {
|
||||
HashSet<String> hashSampleNames = new HashSet<String>(sampleNames); // to speed up the "contains" access in the for loop
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
|
||||
if(read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
|
||||
if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
|
||||
if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
} else {
|
||||
if (read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSample(String sampleName) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleName);
|
||||
return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
|
||||
}
|
||||
else {
|
||||
return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null;
|
||||
} else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
for (PE p : pileupElementTracker) {
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(sampleName != null) {
|
||||
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
|
||||
if (sampleName != null) {
|
||||
if (read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
} else {
|
||||
if (read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -727,9 +723,9 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* <p/>
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* <p/>
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
|
|
@ -739,9 +735,17 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
return new Iterator<PileupElement>() {
|
||||
private final Iterator<PE> wrappedIterator = pileupElementTracker.iterator();
|
||||
|
||||
public boolean hasNext() { return wrappedIterator.hasNext(); }
|
||||
public PileupElement next() { return wrappedIterator.next(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); }
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public PileupElement next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Cannot remove from a pileup element iterator");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -784,7 +788,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public int depthOfCoverage() {
|
||||
if ( abstractSize == -1 )
|
||||
if (abstractSize == -1)
|
||||
calculateAbstractSize();
|
||||
return abstractSize;
|
||||
}
|
||||
|
|
@ -794,7 +798,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return size==0;
|
||||
return size == 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -816,19 +820,18 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts();
|
||||
for(int i = 0; i < counts.length; i++)
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
for (final String sample : tracker.getSamples()) {
|
||||
int[] countsBySample = createNewPileup(loc, tracker.getElements(sample)).getBaseCounts();
|
||||
for (int i = 0; i < counts.length; i++)
|
||||
counts[i] += countsBySample[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
for ( PileupElement pile : this ) {
|
||||
} else {
|
||||
for (PileupElement pile : this) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() ) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
if (!pile.isDeletion()) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char) pile.getBase());
|
||||
if (index != -1)
|
||||
counts[index]++;
|
||||
}
|
||||
|
|
@ -857,65 +860,80 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<GATKSAMRecord> getReads() {
|
||||
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(getNumberOfElements());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
for (PileupElement pile : this) {
|
||||
reads.add(pile.getRead());
|
||||
}
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(getNumberOfElements());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
for (PileupElement pile : this) {
|
||||
offsets.add(pile.getOffset());
|
||||
}
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); }
|
||||
for (PileupElement pile : pileupElementTracker) {
|
||||
v[pos++] = pile.getBase();
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); }
|
||||
for (PileupElement pile : pileupElementTracker) {
|
||||
v[pos++] = pile.getQual();
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); }
|
||||
for (PileupElement pile : pileupElementTracker) {
|
||||
v[pos++] = (byte) pile.getRead().getMappingQuality();
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static String quals2String( byte[] quals ) {
|
||||
static String quals2String(byte[] quals) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
for (int qual : quals) {
|
||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
||||
qualStr.append(qualChar);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import java.util.Arrays;
|
|||
* are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted
|
||||
* in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept.
|
||||
* The insertions that may be present in some reads are not seen at all in such strict reference traversal mode.
|
||||
*
|
||||
* <p/>
|
||||
* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e.
|
||||
* last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is
|
||||
* an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice:
|
||||
|
|
@ -22,9 +22,9 @@ import java.util.Arrays;
|
|||
* (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at
|
||||
* Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed
|
||||
* only once, at the position where the indel maps (starts).
|
||||
*
|
||||
* <p/>
|
||||
* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location.
|
||||
*
|
||||
* <p/>
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 21, 2009
|
||||
|
|
@ -39,40 +39,52 @@ public class ExtendedEventPileupElement extends PileupElement {
|
|||
private Type type = null;
|
||||
private int eventLength = -1;
|
||||
private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases
|
||||
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
|
||||
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
|
||||
private SAMRecord read;
|
||||
private int offset; // position in the read immediately BEFORE the event
|
||||
// This is broken! offset is always zero because these member variables are shadowed by base class
|
||||
|
||||
/** Constructor for extended pileup element (indel).
|
||||
*
|
||||
* @param read the read, in which the indel is observed
|
||||
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
|
||||
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
|
||||
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
|
||||
*/
|
||||
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length, byte[] eventBases ) {
|
||||
super(read, offset);
|
||||
this.eventLength = length;
|
||||
if ( length <= 0 ) type = Type.NOEVENT;
|
||||
else {
|
||||
if ( eventBases != null ) {
|
||||
this.eventBases = new String(eventBases).toUpperCase();
|
||||
type = Type.INSERTION;
|
||||
} else {
|
||||
type = Type.DELETION;
|
||||
}
|
||||
}
|
||||
|
||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
|
||||
super(read, offset, type == Type.DELETION);
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
this.eventLength = eventLength;
|
||||
this.eventBases = eventBases;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should
|
||||
* be null or are ignored in these cases anyway)
|
||||
* @param read
|
||||
* @param offset
|
||||
* @param length
|
||||
/**
|
||||
* Quick constructor for insertions.
|
||||
*
|
||||
* @param read the read, in which the indel is observed
|
||||
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
|
||||
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
|
||||
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
|
||||
*/
|
||||
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length ) {
|
||||
this(read,offset, length, null);
|
||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int length, byte[] eventBases) {
|
||||
this(read, offset, length, new String(eventBases).toUpperCase(), Type.INSERTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick constructor for non indels (matches)
|
||||
*
|
||||
* @param read the read
|
||||
* @param offset where in the read the match is
|
||||
*/
|
||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset) {
|
||||
this(read, offset, -1, null, Type.NOEVENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick constructor for deletions
|
||||
*
|
||||
* @param read the read
|
||||
* @param offset the last base before the deletion starts (left aligned deletion)
|
||||
* @param length length of this deletion
|
||||
*/
|
||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int length) {
|
||||
this(read, offset, length, null, Type.DELETION);
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
|
|
@ -87,46 +99,54 @@ public class ExtendedEventPileupElement extends PileupElement {
|
|||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public Type getType() { return type; }
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
||||
// a valid base quality. The following code attempts to compensate for that.'
|
||||
|
||||
@Override
|
||||
public byte getBase() {
|
||||
return getBase(offset >= 0 ? offset : offset+eventLength);
|
||||
return getBase(offset >= 0 ? offset : offset + eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBaseIndex() {
|
||||
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
|
||||
return getBaseIndex(offset >= 0 ? offset : offset + eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual() {
|
||||
return getQual(offset >= 0 ? offset : offset+eventLength);
|
||||
return getQual(offset >= 0 ? offset : offset + eventLength);
|
||||
}
|
||||
|
||||
/** Returns length of the event (number of inserted or deleted bases */
|
||||
public int getEventLength() { return eventLength; }
|
||||
/**
|
||||
* Returns length of the event (number of inserted or deleted bases
|
||||
*/
|
||||
public int getEventLength() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
/** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
|
||||
* */
|
||||
public String getEventBases() { return eventBases; }
|
||||
/**
|
||||
* Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
|
||||
*/
|
||||
public String getEventBases() {
|
||||
return eventBases;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
char c = '.';
|
||||
String fillStr = null ;
|
||||
if ( isDeletion() ) {
|
||||
String fillStr = null;
|
||||
if (isDeletion()) {
|
||||
c = '-';
|
||||
char [] filler = new char[eventLength];
|
||||
char[] filler = new char[eventLength];
|
||||
Arrays.fill(filler, 'D');
|
||||
fillStr = new String(filler);
|
||||
}
|
||||
else if ( isInsertion() ) c = '+';
|
||||
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()?
|
||||
(isInsertion() ? eventBases : fillStr ): "", getMappingQual());
|
||||
} else if (isInsertion()) c = '+';
|
||||
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel() ?
|
||||
(isInsertion() ? eventBases : fillStr) : "", getMappingQual());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
|
|
@ -21,25 +22,61 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
|
||||
protected final GATKSAMRecord read;
|
||||
protected final int offset;
|
||||
protected final boolean isDeletion;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new pileup element.
|
||||
*
|
||||
* @param read the read we are adding to the pileup
|
||||
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||
* @param isDeletion whether or not this base is a deletion
|
||||
*/
|
||||
@Requires({
|
||||
"read != null",
|
||||
"offset >= -1",
|
||||
"offset <= read.getReadLength()"})
|
||||
public PileupElement( GATKSAMRecord read, int offset ) {
|
||||
public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
if (offset < 0 && isDeletion)
|
||||
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
|
||||
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
this.isDeletion = isDeletion;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Creates a NON DELETION pileup element.
|
||||
// *
|
||||
// * use this constructor only for insertions and matches/mismatches.
|
||||
// * @param read the read we are adding to the pileup
|
||||
// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||
// */
|
||||
// @Requires({
|
||||
// "read != null",
|
||||
// "offset >= -1",
|
||||
// "offset <= read.getReadLength()"})
|
||||
// public PileupElement( GATKSAMRecord read, int offset ) {
|
||||
// this(read, offset, false);
|
||||
// }
|
||||
//
|
||||
public boolean isDeletion() {
|
||||
return isDeletion;
|
||||
}
|
||||
|
||||
public boolean isInsertionAtBeginningOfRead() {
|
||||
return offset == -1;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public GATKSAMRecord getRead() { return read; }
|
||||
public GATKSAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
@Ensures("result == offset")
|
||||
public int getOffset() { return offset; }
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public byte getBase() {
|
||||
return getBase(offset);
|
||||
|
|
@ -59,30 +96,30 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
|
||||
@Ensures("result != null")
|
||||
public String toString() {
|
||||
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual());
|
||||
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char) getBase(), getQual());
|
||||
}
|
||||
|
||||
protected byte getBase(final int offset) {
|
||||
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
protected int getBaseIndex(final int offset) {
|
||||
return BaseUtils.simpleBaseToBaseIndex(isDeletion() ? DELETION_BASE : read.getReadBases()[offset]);
|
||||
return BaseUtils.simpleBaseToBaseIndex((isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]);
|
||||
}
|
||||
|
||||
protected byte getQual(final int offset) {
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final PileupElement pileupElement) {
|
||||
if ( offset < pileupElement.offset )
|
||||
if (offset < pileupElement.offset)
|
||||
return -1;
|
||||
else if ( offset > pileupElement.offset )
|
||||
else if (offset > pileupElement.offset)
|
||||
return 1;
|
||||
else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() )
|
||||
else if (read.getAlignmentStart() < pileupElement.read.getAlignmentStart())
|
||||
return -1;
|
||||
else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() )
|
||||
else if (read.getAlignmentStart() > pileupElement.read.getAlignmentStart())
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
|
|
@ -94,13 +131,26 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
//
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
public boolean isReducedRead() {
|
||||
return read.isReducedRead();
|
||||
}
|
||||
// public boolean isReducedRead() {
|
||||
// return read.isReducedRead();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Returns the number of elements in the pileup element.
|
||||
* <p/>
|
||||
* Unless this is a reduced read, the number of elements in a pileup element is one. In the event of
|
||||
* this being a reduced read and a deletion, we return the average number of elements between the left
|
||||
* and right elements to the deletion. We assume the deletion to be left aligned.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getRepresentativeCount() {
|
||||
// TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed
|
||||
return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1;
|
||||
int representativeCount = 1;
|
||||
|
||||
if (read.isReducedRead() && !isInsertionAtBeginningOfRead())
|
||||
representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset);
|
||||
|
||||
return representativeCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -30,33 +30,34 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileupImpl,ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
|
||||
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileupImpl, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
|
||||
private int nInsertions;
|
||||
private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site
|
||||
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) {
|
||||
super(loc,pileupElements);
|
||||
super(loc, pileupElements);
|
||||
}
|
||||
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {
|
||||
super(loc,tracker);
|
||||
super(loc, tracker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
*
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
|
||||
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads) {
|
||||
super(loc,pileup,size,nDeletions,nMQ0Reads);
|
||||
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads) {
|
||||
super(loc, pileup, size, nDeletions, nMQ0Reads);
|
||||
this.maxDeletionLength = maxDeletionLength;
|
||||
this.nInsertions = nInsertions;
|
||||
}
|
||||
|
||||
// this is the good new one
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
|
||||
super(loc,pileupElementsBySample);
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String, ? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
|
||||
super(loc, pileupElementsBySample);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -71,31 +72,31 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( ExtendedEventPileupElement p : this.toExtendedIterable() ) {
|
||||
for (ExtendedEventPileupElement p : this.toExtendedIterable()) {
|
||||
|
||||
if ( p.isDeletion() ) {
|
||||
if (p.isDeletion()) {
|
||||
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
|
||||
} else {
|
||||
if ( p.isInsertion() ) nInsertions++;
|
||||
if (p.isInsertion()) nInsertions++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<ReadBackedExtendedEventPileupImpl,ExtendedEventPileupElement> pileup) {
|
||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<ReadBackedExtendedEventPileupImpl, ExtendedEventPileupElement> pileup) {
|
||||
super.addPileupToCumulativeStats(pileup);
|
||||
ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup)pileup);
|
||||
ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup) pileup);
|
||||
this.nInsertions += extendedEventPileup.getNumberOfInsertions();
|
||||
this.maxDeletionLength += extendedEventPileup.getMaxDeletionLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ReadBackedExtendedEventPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {
|
||||
return new ReadBackedExtendedEventPileupImpl(loc,tracker);
|
||||
return new ReadBackedExtendedEventPileupImpl(loc, tracker);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
|
||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
|
||||
}
|
||||
|
||||
|
|
@ -110,10 +111,12 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
return nInsertions;
|
||||
}
|
||||
|
||||
/** Returns the length of the longest deletion observed at the site this
|
||||
/**
|
||||
* Returns the length of the longest deletion observed at the site this
|
||||
* pileup is associated with (NOTE: by convention, both insertions and deletions
|
||||
* are associated with genomic location immediately before the actual event). If
|
||||
* there are no deletions at the site, returns 0.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
|
|
@ -123,36 +126,47 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
|
||||
public Iterable<ExtendedEventPileupElement> toExtendedIterable() {
|
||||
return new Iterable<ExtendedEventPileupElement>() {
|
||||
public Iterator<ExtendedEventPileupElement> iterator() { return pileupElementTracker.iterator(); }
|
||||
public Iterator<ExtendedEventPileupElement> iterator() {
|
||||
return pileupElementTracker.iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getEvents() {
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION: v[i] = 'I'; break;
|
||||
case DELETION: v[i] = 'D'; break;
|
||||
case NOEVENT: v[i] = '.'; break;
|
||||
default: throw new ReviewedStingException("Unknown event type encountered: "+e.getType());
|
||||
for (ExtendedEventPileupElement e : this.toExtendedIterable()) {
|
||||
switch (e.getType()) {
|
||||
case INSERTION:
|
||||
v[i] = 'I';
|
||||
break;
|
||||
case DELETION:
|
||||
v[i] = 'D';
|
||||
break;
|
||||
case NOEVENT:
|
||||
v[i] = '.';
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown event type encountered: " + e.getType());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
/** A shortcut for getEventStringsWithCounts(null);
|
||||
/**
|
||||
* A shortcut for getEventStringsWithCounts(null);
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts() {
|
||||
public List<Pair<String, Integer>> getEventStringsWithCounts() {
|
||||
return getEventStringsWithCounts(null);
|
||||
}
|
||||
|
||||
|
|
@ -163,44 +177,48 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
// insertion, deletion or no-event, respectively.
|
||||
return String.format("%s %s E %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
new String(getEvents()) );
|
||||
new String(getEvents()));
|
||||
}
|
||||
|
||||
/** Returns String representation of all distinct extended events (indels) at the site along with
|
||||
/**
|
||||
* Returns String representation of all distinct extended events (indels) at the site along with
|
||||
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
|
||||
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
|
||||
* deleted sequence will be used in the string representation (e.g. "-AAC").
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
*
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
* @return list of distinct events; first element of a pair is a string representation of the event, second element
|
||||
* gives the number of reads, in which that event was observed
|
||||
* gives the number of reads, in which that event was observed
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
|
||||
Map<String, Integer> events = new HashMap<String,Integer>();
|
||||
public List<Pair<String, Integer>> getEventStringsWithCounts(byte[] refBases) {
|
||||
Map<String, Integer> events = new HashMap<String, Integer>();
|
||||
|
||||
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
|
||||
for (ExtendedEventPileupElement e : this.toExtendedIterable()) {
|
||||
Integer cnt;
|
||||
String indel = null;
|
||||
switch ( e.getType() ) {
|
||||
switch (e.getType()) {
|
||||
case INSERTION:
|
||||
indel = "+"+e.getEventBases();
|
||||
indel = "+" + e.getEventBases();
|
||||
break;
|
||||
case DELETION:
|
||||
indel = getDeletionString(e.getEventLength(),refBases);
|
||||
indel = getDeletionString(e.getEventLength(), refBases);
|
||||
break;
|
||||
case NOEVENT: continue;
|
||||
default: throw new ReviewedStingException("Unknown event type encountered: "+e.getType());
|
||||
case NOEVENT:
|
||||
continue;
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown event type encountered: " + e.getType());
|
||||
}
|
||||
|
||||
cnt = events.get(indel);
|
||||
if ( cnt == null ) events.put(indel,1);
|
||||
else events.put(indel,cnt.intValue()+1);
|
||||
if (cnt == null) events.put(indel, 1);
|
||||
else events.put(indel, cnt.intValue() + 1);
|
||||
}
|
||||
|
||||
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
|
||||
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
|
||||
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
|
||||
List<Pair<String, Integer>> eventList = new ArrayList<Pair<String, Integer>>(events.size());
|
||||
for (Map.Entry<String, Integer> m : events.entrySet()) {
|
||||
eventList.add(new Pair<String, Integer>(m.getKey(), m.getValue()));
|
||||
}
|
||||
return eventList;
|
||||
}
|
||||
|
|
@ -208,18 +226,19 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
/**
|
||||
* Builds string representation of the deletion event. If refBases is null, the representation will be
|
||||
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
|
||||
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
|
||||
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
|
||||
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
|
||||
* deletion length (i.e. size of refBase must be at least length+1).
|
||||
*
|
||||
* @param length
|
||||
* @param refBases
|
||||
* @return
|
||||
*/
|
||||
private String getDeletionString(int length, byte[] refBases) {
|
||||
if ( refBases == null ) {
|
||||
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
|
||||
if (refBases == null) {
|
||||
return Integer.toString(length) + "D"; // if we do not have reference bases, we can only report something like "5D"
|
||||
} else {
|
||||
return "-"+new String(refBases,1,length).toUpperCase();
|
||||
return "-" + new String(refBases, 1, length).toUpperCase();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -29,48 +29,49 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPileupImpl,PileupElement> implements ReadBackedPileup {
|
||||
public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPileupImpl, PileupElement> implements ReadBackedPileup {
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc) {
|
||||
super(loc);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
super(loc,reads,offsets);
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets) {
|
||||
super(loc, reads, offsets);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
|
||||
super(loc,reads,offset);
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, int offset) {
|
||||
super(loc, reads, offset);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileupElements) {
|
||||
super(loc,pileupElements);
|
||||
super(loc, pileupElements);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,ReadBackedPileupImpl> pileupElementsBySample) {
|
||||
super(loc,pileupElementsBySample);
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, Map<String, ReadBackedPileupImpl> pileupElementsBySample) {
|
||||
super(loc, pileupElementsBySample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
*
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads) {
|
||||
super(loc,pileup,size,nDeletions,nMQ0Reads);
|
||||
super(loc, pileup, size, nDeletions, nMQ0Reads);
|
||||
}
|
||||
|
||||
protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker<PileupElement> tracker) {
|
||||
super(loc,tracker);
|
||||
super(loc, tracker);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ReadBackedPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker<PileupElement> tracker) {
|
||||
return new ReadBackedPileupImpl(loc,tracker);
|
||||
return new ReadBackedPileupImpl(loc, tracker);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
|
||||
return new PileupElement(read,offset);
|
||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
return new PileupElement(read, offset, isDeletion);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -27,7 +27,7 @@ public class ArtificialSAMUtils {
|
|||
* @param chromosomeSize how large each chromosome is
|
||||
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
||||
*/
|
||||
public static void createArtificialBamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) {
|
||||
public static void createArtificialBamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
||||
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
||||
File outFile = new File(filename);
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class ArtificialSAMUtils {
|
|||
* @param chromosomeSize how large each chromosome is
|
||||
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
||||
*/
|
||||
public static void createArtificialSamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) {
|
||||
public static void createArtificialSamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
||||
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
||||
File outFile = new File(filename);
|
||||
|
||||
|
|
@ -72,16 +72,15 @@ public class ArtificialSAMUtils {
|
|||
* @param numberOfChromosomes the number of chromosomes to create
|
||||
* @param startingChromosome the starting number for the chromosome (most likely set to 1)
|
||||
* @param chromosomeSize the length of each chromosome
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static SAMFileHeader createArtificialSamHeader( int numberOfChromosomes, int startingChromosome, int chromosomeSize ) {
|
||||
public static SAMFileHeader createArtificialSamHeader(int numberOfChromosomes, int startingChromosome, int chromosomeSize) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
|
||||
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||
// make up some sequence records
|
||||
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
|
||||
SAMSequenceRecord rec = new SAMSequenceRecord("chr" + ( x ), chromosomeSize /* size */);
|
||||
SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */);
|
||||
rec.setSequenceLength(chromosomeSize);
|
||||
dict.addSequence(rec);
|
||||
}
|
||||
|
|
@ -95,10 +94,9 @@ public class ArtificialSAMUtils {
|
|||
* @param header the header to set
|
||||
* @param readGroupID the read group ID tag
|
||||
* @param sampleName the sample name
|
||||
*
|
||||
* @return the adjusted SAMFileHeader
|
||||
*/
|
||||
public static SAMFileHeader createDefaultReadGroup( SAMFileHeader header, String readGroupID, String sampleName ) {
|
||||
public static SAMFileHeader createDefaultReadGroup(SAMFileHeader header, String readGroupID, String sampleName) {
|
||||
SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupID);
|
||||
rec.setSample(sampleName);
|
||||
List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
|
||||
|
|
@ -113,10 +111,9 @@ public class ArtificialSAMUtils {
|
|||
* @param header the header to set
|
||||
* @param readGroupIDs the read group ID tags
|
||||
* @param sampleNames the sample names
|
||||
*
|
||||
* @return the adjusted SAMFileHeader
|
||||
*/
|
||||
public static SAMFileHeader createEnumeratedReadGroups( SAMFileHeader header, List<String> readGroupIDs, List<String> sampleNames ) {
|
||||
public static SAMFileHeader createEnumeratedReadGroups(SAMFileHeader header, List<String> readGroupIDs, List<String> sampleNames) {
|
||||
if (readGroupIDs.size() != sampleNames.size()) {
|
||||
throw new ReviewedStingException("read group count and sample name count must be the same");
|
||||
}
|
||||
|
|
@ -137,18 +134,16 @@ public class ArtificialSAMUtils {
|
|||
/**
|
||||
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
||||
*
|
||||
*
|
||||
* @param header the SAM header to associate the read with
|
||||
* @param name the name of the read
|
||||
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
||||
* @param alignmentStart where to start the alignment
|
||||
* @param length the length of the read
|
||||
*
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
|
||||
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
|
||||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
|
||||
if ((refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
|
||||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START))
|
||||
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
record.setReadName(name);
|
||||
|
|
@ -183,10 +178,9 @@ public class ArtificialSAMUtils {
|
|||
* @param alignmentStart where to start the alignment
|
||||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
*
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
|
||||
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual) {
|
||||
if (bases.length != qual.length) {
|
||||
throw new ReviewedStingException("Passed in read string is different length then the quality array");
|
||||
}
|
||||
|
|
@ -210,10 +204,9 @@ public class ArtificialSAMUtils {
|
|||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
* @param cigar the cigar string of the read
|
||||
*
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) {
|
||||
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar) {
|
||||
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual);
|
||||
rec.setCigarString(cigar);
|
||||
return rec;
|
||||
|
|
@ -221,22 +214,21 @@ public class ArtificialSAMUtils {
|
|||
|
||||
/**
|
||||
* Create an artificial read with the following default parameters :
|
||||
* header:
|
||||
* numberOfChromosomes = 1
|
||||
* startingChromosome = 1
|
||||
* chromosomeSize = 1000000
|
||||
* read:
|
||||
* name = "default_read"
|
||||
* refIndex = 0
|
||||
* alignmentStart = 1
|
||||
*
|
||||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
* @param cigar the cigar string of the read
|
||||
* header:
|
||||
* numberOfChromosomes = 1
|
||||
* startingChromosome = 1
|
||||
* chromosomeSize = 1000000
|
||||
* read:
|
||||
* name = "default_read"
|
||||
* refIndex = 0
|
||||
* alignmentStart = 1
|
||||
*
|
||||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
* @param cigar the cigar string of the read
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||
public static GATKSAMRecord createArtificialRead(byte[] bases, byte[] qual, String cigar) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar);
|
||||
}
|
||||
|
|
@ -253,7 +245,7 @@ public class ArtificialSAMUtils {
|
|||
right.setProperPairFlag(true);
|
||||
|
||||
left.setFirstOfPairFlag(leftIsFirst);
|
||||
right.setFirstOfPairFlag(! leftIsFirst);
|
||||
right.setFirstOfPairFlag(!leftIsFirst);
|
||||
|
||||
left.setReadNegativeStrandFlag(leftIsNegative);
|
||||
left.setMateNegativeStrandFlag(!leftIsNegative);
|
||||
|
|
@ -279,11 +271,10 @@ public class ArtificialSAMUtils {
|
|||
* @param startingChr the chromosome (reference ID) to start from
|
||||
* @param endingChr the id to end with
|
||||
* @param readCount the number of reads per chromosome
|
||||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static StingSAMIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
public static StingSAMIterator mappedReadIterator(int startingChr, int endingChr, int readCount) {
|
||||
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
|
||||
}
|
||||
|
|
@ -295,11 +286,10 @@ public class ArtificialSAMUtils {
|
|||
* @param endingChr the id to end with
|
||||
* @param readCount the number of reads per chromosome
|
||||
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
||||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static StingSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
public static StingSAMIterator mappedAndUnmappedReadIterator(int startingChr, int endingChr, int readCount, int unmappedReadCount) {
|
||||
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||
}
|
||||
|
|
@ -310,11 +300,10 @@ public class ArtificialSAMUtils {
|
|||
* @param startingChr the chromosome (reference ID) to start from
|
||||
* @param endingChr the id to end with
|
||||
* @param readCount the number of reads per chromosome
|
||||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
public static ArtificialSAMQueryIterator queryReadIterator(int startingChr, int endingChr, int readCount) {
|
||||
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
|
||||
}
|
||||
|
|
@ -326,11 +315,10 @@ public class ArtificialSAMUtils {
|
|||
* @param endingChr the id to end with
|
||||
* @param readCount the number of reads per chromosome
|
||||
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
||||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static StingSAMIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
public static StingSAMIterator queryReadIterator(int startingChr, int endingChr, int readCount, int unmappedReadCount) {
|
||||
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||
}
|
||||
|
|
@ -345,6 +333,7 @@ public class ArtificialSAMUtils {
|
|||
* reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert
|
||||
* size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second
|
||||
* may be, depending on where this sampled insertSize puts it.
|
||||
*
|
||||
* @param header
|
||||
* @param loc
|
||||
* @param readLen
|
||||
|
|
@ -360,22 +349,22 @@ public class ArtificialSAMUtils {
|
|||
final int pos = loc.getStart();
|
||||
|
||||
final List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
for ( int i = 0; i < pileupSize / 2; i++ ) {
|
||||
for (int i = 0; i < pileupSize / 2; i++) {
|
||||
final String readName = "read" + i;
|
||||
final int leftStart = ranIntInclusive(ran, 1, pos);
|
||||
final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize);
|
||||
final int fragmentSize = (int) (ran.nextGaussian() * insertSizeVariation + insertSize);
|
||||
final int rightStart = leftStart + fragmentSize - readLen;
|
||||
|
||||
if ( rightStart <= 0 ) continue;
|
||||
if (rightStart <= 0) continue;
|
||||
|
||||
List<GATKSAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||
final GATKSAMRecord left = pair.get(0);
|
||||
final GATKSAMRecord right = pair.get(1);
|
||||
|
||||
pileupElements.add(new PileupElement(left, pos - leftStart));
|
||||
pileupElements.add(new PileupElement(left, pos - leftStart, false));
|
||||
|
||||
if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) {
|
||||
pileupElements.add(new PileupElement(right, pos - rightStart));
|
||||
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
|
||||
pileupElements.add(new PileupElement(right, pos - rightStart, false));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,20 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.log4j.*;
|
||||
import org.apache.log4j.AppenderSkeleton;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.PatternLayout;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("d61c7055bd09024abb8902bde6bd3960"));
|
||||
Arrays.asList("653172b43b19003d9f7df6dab21f4b09"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("b11df6587e4e16cb819d76a900446946"));
|
||||
Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -255,7 +255,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("59068bc8888ad5f08790946066d76602"));
|
||||
Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("fcd590a55f5fec2a9b7e628187d6b8a8"));
|
||||
Arrays.asList("877de5b0cc61dc54636062df6399b978"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,12 +42,12 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testReducedReadPileupElement() {
|
||||
PileupElement readp = new PileupElement(read, 0);
|
||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0);
|
||||
PileupElement readp = new PileupElement(read, 0, false);
|
||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false);
|
||||
|
||||
Assert.assertFalse(readp.isReducedRead());
|
||||
Assert.assertFalse(readp.getRead().isReducedRead());
|
||||
|
||||
Assert.assertTrue(reducedreadp.isReducedRead());
|
||||
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue