Updated, optimized REadBackedPileup. Updated test that was breaking the build -- it created a pileup from reads without bases...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-25 23:30:39 +00:00
parent 65da04ca85
commit 75b61a3663
3 changed files with 40 additions and 48 deletions

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -97,11 +98,7 @@ public class LocusIteratorByState extends LocusIterator {
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
}
//private CigarElement getCurElement() { return curElement; }
public CigarOperator getCurrentCigarOperator() {
//if ( curElement == null )
// System.out.printf("%s%n", this);
return curElement.getOperator();
}
@ -110,10 +107,6 @@ public class LocusIteratorByState extends LocusIterator {
}
public CigarOperator stepForwardOnGenome() {
//if ( cigarOffset == cigar.numCigarElements() )
// return null; // we are done
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
cigarOffset++;
if ( cigarOffset < nCigarElements ) {
@ -135,14 +128,10 @@ public class LocusIteratorByState extends LocusIterator {
break;
case S : // soft clip
case I : // insertion w.r.t. the reference
// readOffset++; done = true; break;
cigarElementCounter = curElement.getLength();
readOffset += curElement.getLength();
break;
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
// cigarElementCounter = curElement.getLength();
// genomeOffset += curElement.getLength();
// break;
case D : // deletion w.r.t. the reference
genomeOffset++;
done = true;
@ -155,8 +144,6 @@ public class LocusIteratorByState extends LocusIterator {
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
}
//if (DEBUG2) System.out.printf("stepForwardOnGenome3: done=%b curElement=%s, counter=%d, len=%d, offset=%d%n",
// done, curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1, getReadOffset());
return done ? curElement.getOperator() : stepForwardOnGenome();
}
}
@ -214,37 +201,32 @@ public class LocusIteratorByState extends LocusIterator {
//
// -----------------------------------------------------------------------------------------------------------------
public AlignmentContext next() {
//if (DEBUG) {
// logger.debug("in Next:");
// printState();
//}
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
while(true) {
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
collectPendingReads(readInfo.getMaxReadsAtLocus());
int size = 0;
int nDeletions = 0;
// todo -- performance problem -- should be lazy, really
for ( SAMRecordState state : readStates ) {
size++;
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
PileupElement p = new PileupElement(state.getRead(), state.getReadOffset());
pile.add(p);
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
pile.add(new PileupElement(state.getRead(), -1));
nDeletions++;
}
}
GenomeLoc loc = getLocation();
updateReadStates(); // critical - must be called after we get the current state offsets and location
//if (DEBUG) {
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
// printState();
//}
// if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
}
}

View File

@ -23,7 +23,6 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int[] counts = new int[4]; // cached value of counts
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
@ -56,6 +55,22 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
@ -64,17 +79,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
} else {
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if (index == -1)
continue;
counts[index]++;
}
}
}
@ -244,19 +253,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @return
*/
public int[] getBaseCounts() {
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
return counts;
// int[] counts = new int[4];
// for ( PileupElement pile : this ) {
// // skip deletion sites
// if ( ! pile.isDeletion() ) {
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
// if (index == -1)
// continue;
// counts[index]++;
// }
// }
//
// return counts;
}
/**

View File

@ -279,8 +279,11 @@ public abstract class LocusViewTemplate extends BaseTest {
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar();
cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M));
int len = alignmentEnd - alignmentStart + 1;
cigar.add(new CigarElement(len, CigarOperator.M));
record.setCigar(cigar);
record.setReadBases(new byte[len]);
record.setBaseQualities(new byte[len]);
return record;
}