diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index e1aff00f3..fdecd3d5e 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -97,11 +98,7 @@ public class LocusIteratorByState extends LocusIterator { return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); } - //private CigarElement getCurElement() { return curElement; } - public CigarOperator getCurrentCigarOperator() { - //if ( curElement == null ) - // System.out.printf("%s%n", this); return curElement.getOperator(); } @@ -110,10 +107,6 @@ public class LocusIteratorByState extends LocusIterator { } public CigarOperator stepForwardOnGenome() { - //if ( cigarOffset == cigar.numCigarElements() ) - // return null; // we are done - - //if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1); if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) { cigarOffset++; if ( cigarOffset < nCigarElements ) { @@ -135,14 +128,10 @@ public class LocusIteratorByState extends LocusIterator { break; case S : // soft clip case I : // insertion w.r.t. the reference -// readOffset++; done = true; break; cigarElementCounter = curElement.getLength(); readOffset += curElement.getLength(); break; case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning) -// cigarElementCounter = curElement.getLength(); -// genomeOffset += curElement.getLength(); -// break; case D : // deletion w.r.t. the reference genomeOffset++; done = true; @@ -155,8 +144,6 @@ public class LocusIteratorByState extends LocusIterator { default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); } - //if (DEBUG2) System.out.printf("stepForwardOnGenome3: done=%b curElement=%s, counter=%d, len=%d, offset=%d%n", - // done, curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1, getReadOffset()); return done ? curElement.getOperator() : stepForwardOnGenome(); } } @@ -214,37 +201,32 @@ public class LocusIteratorByState extends LocusIterator { // // ----------------------------------------------------------------------------------------------------------------- public AlignmentContext next() { - //if (DEBUG) { - // logger.debug("in Next:"); - // printState(); - //} - - ArrayList pile = new ArrayList(readStates.size()); - // keep iterating forward until we encounter a reference position that has something "real" hanging over it // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true) while(true) { + ArrayList pile = new ArrayList(readStates.size()); collectPendingReads(readInfo.getMaxReadsAtLocus()); + int size = 0; + int nDeletions = 0; + // todo -- performance problem -- should be lazy, really for ( SAMRecordState state : readStates ) { + size++; if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { -// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); - pile.add(new PileupElement(state.getRead(), state.getReadOffset())); + PileupElement p = new PileupElement(state.getRead(), state.getReadOffset()); + pile.add(p); } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { pile.add(new PileupElement(state.getRead(), -1)); + nDeletions++; } } GenomeLoc loc = getLocation(); updateReadStates(); // critical - must be called after we get the current state offsets and location - //if (DEBUG) { - // logger.debug("DONE WITH NEXT, updating read states, current state is:"); - // printState(); - //} // if we got reads with non-D/N over the current position, we are done - if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile)); + if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions)); } } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 030612b7a..7fe8d0abc 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -23,7 +23,6 @@ public class ReadBackedPileup implements Iterable { private int size = 0; // cached value of the size of the pileup private int nDeletions = 0; // cached value of the number of deletions - private int[] counts = new int[4]; // cached value of counts /** * Create a new version of a read backed pileup at loc, using the reads and their corresponding @@ -56,6 +55,22 @@ public class ReadBackedPileup implements Iterable { calculatedCachedData(); } + /** + * Optimization of above constructor where all of the cached data is provided + * @param loc + * @param pileup + */ + public ReadBackedPileup(GenomeLoc loc, ArrayList pileup, int size, int nDeletions ) { + if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2"); + if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2"); + + this.loc = loc; + this.pileup = pileup; + this.size = size; + this.nDeletions = nDeletions; + } + + /** * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling @@ -64,17 +79,11 @@ public class ReadBackedPileup implements Iterable { private void calculatedCachedData() { size = 0; nDeletions = 0; - counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0; for ( PileupElement p : this ) { size++; if ( p.isDeletion() ) { nDeletions++; - } else { - int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase()); - if (index == -1) - continue; - counts[index]++; } } } @@ -244,19 +253,17 @@ public class ReadBackedPileup implements Iterable { * @return */ public int[] getBaseCounts() { + int[] counts = new int[4]; + for ( PileupElement pile : this ) { + // skip deletion sites + if ( ! pile.isDeletion() ) { + int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); + if (index != -1) + counts[index]++; + } + } + return counts; -// int[] counts = new int[4]; -// for ( PileupElement pile : this ) { -// // skip deletion sites -// if ( ! pile.isDeletion() ) { -// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); -// if (index == -1) -// continue; -// counts[index]++; -// } -// } -// -// return counts; } /** diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 119dacf96..eaf671e47 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -279,8 +279,11 @@ public abstract class LocusViewTemplate extends BaseTest { record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); - cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M)); + int len = alignmentEnd - alignmentStart + 1; + cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); + record.setReadBases(new byte[len]); + record.setBaseQualities(new byte[len]); return record; }