Updated, optimized REadBackedPileup. Updated test that was breaking the build -- it created a pileup from reads without bases...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-25 23:30:39 +00:00
parent 65da04ca85
commit 75b61a3663
3 changed files with 40 additions and 48 deletions

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -97,11 +98,7 @@ public class LocusIteratorByState extends LocusIterator {
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
} }
//private CigarElement getCurElement() { return curElement; }
public CigarOperator getCurrentCigarOperator() { public CigarOperator getCurrentCigarOperator() {
//if ( curElement == null )
// System.out.printf("%s%n", this);
return curElement.getOperator(); return curElement.getOperator();
} }
@ -110,10 +107,6 @@ public class LocusIteratorByState extends LocusIterator {
} }
public CigarOperator stepForwardOnGenome() { public CigarOperator stepForwardOnGenome() {
//if ( cigarOffset == cigar.numCigarElements() )
// return null; // we are done
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) { if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
cigarOffset++; cigarOffset++;
if ( cigarOffset < nCigarElements ) { if ( cigarOffset < nCigarElements ) {
@ -135,14 +128,10 @@ public class LocusIteratorByState extends LocusIterator {
break; break;
case S : // soft clip case S : // soft clip
case I : // insertion w.r.t. the reference case I : // insertion w.r.t. the reference
// readOffset++; done = true; break;
cigarElementCounter = curElement.getLength(); cigarElementCounter = curElement.getLength();
readOffset += curElement.getLength(); readOffset += curElement.getLength();
break; break;
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning) case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
// cigarElementCounter = curElement.getLength();
// genomeOffset += curElement.getLength();
// break;
case D : // deletion w.r.t. the reference case D : // deletion w.r.t. the reference
genomeOffset++; genomeOffset++;
done = true; done = true;
@ -155,8 +144,6 @@ public class LocusIteratorByState extends LocusIterator {
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
} }
//if (DEBUG2) System.out.printf("stepForwardOnGenome3: done=%b curElement=%s, counter=%d, len=%d, offset=%d%n",
// done, curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1, getReadOffset());
return done ? curElement.getOperator() : stepForwardOnGenome(); return done ? curElement.getOperator() : stepForwardOnGenome();
} }
} }
@ -214,37 +201,32 @@ public class LocusIteratorByState extends LocusIterator {
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
public AlignmentContext next() { public AlignmentContext next() {
//if (DEBUG) {
// logger.debug("in Next:");
// printState();
//}
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
// keep iterating forward until we encounter a reference position that has something "real" hanging over it // keep iterating forward until we encounter a reference position that has something "real" hanging over it
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true) // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
while(true) { while(true) {
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
collectPendingReads(readInfo.getMaxReadsAtLocus()); collectPendingReads(readInfo.getMaxReadsAtLocus());
int size = 0;
int nDeletions = 0;
// todo -- performance problem -- should be lazy, really // todo -- performance problem -- should be lazy, really
for ( SAMRecordState state : readStates ) { for ( SAMRecordState state : readStates ) {
size++;
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); PileupElement p = new PileupElement(state.getRead(), state.getReadOffset());
pile.add(new PileupElement(state.getRead(), state.getReadOffset())); pile.add(p);
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
pile.add(new PileupElement(state.getRead(), -1)); pile.add(new PileupElement(state.getRead(), -1));
nDeletions++;
} }
} }
GenomeLoc loc = getLocation(); GenomeLoc loc = getLocation();
updateReadStates(); // critical - must be called after we get the current state offsets and location updateReadStates(); // critical - must be called after we get the current state offsets and location
//if (DEBUG) {
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
// printState();
//}
// if we got reads with non-D/N over the current position, we are done // if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile)); if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
} }
} }

View File

@ -23,7 +23,6 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private int size = 0; // cached value of the size of the pileup private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions private int nDeletions = 0; // cached value of the number of deletions
private int[] counts = new int[4]; // cached value of counts
/** /**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding * Create a new version of a read backed pileup at loc, using the reads and their corresponding
@ -56,6 +55,22 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
calculatedCachedData(); calculatedCachedData();
} }
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
}
/** /**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
@ -64,17 +79,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private void calculatedCachedData() { private void calculatedCachedData() {
size = 0; size = 0;
nDeletions = 0; nDeletions = 0;
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
for ( PileupElement p : this ) { for ( PileupElement p : this ) {
size++; size++;
if ( p.isDeletion() ) { if ( p.isDeletion() ) {
nDeletions++; nDeletions++;
} else {
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if (index == -1)
continue;
counts[index]++;
} }
} }
} }
@ -244,19 +253,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @return * @return
*/ */
public int[] getBaseCounts() { public int[] getBaseCounts() {
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
return counts; return counts;
// int[] counts = new int[4];
// for ( PileupElement pile : this ) {
// // skip deletion sites
// if ( ! pile.isDeletion() ) {
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
// if (index == -1)
// continue;
// counts[index]++;
// }
// }
//
// return counts;
} }
/** /**

View File

@ -279,8 +279,11 @@ public abstract class LocusViewTemplate extends BaseTest {
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart); record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar(); Cigar cigar = new Cigar();
cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M)); int len = alignmentEnd - alignmentStart + 1;
cigar.add(new CigarElement(len, CigarOperator.M));
record.setCigar(cigar); record.setCigar(cigar);
record.setReadBases(new byte[len]);
record.setBaseQualities(new byte[len]);
return record; return record;
} }