Updated, optimized REadBackedPileup. Updated test that was breaking the build -- it created a pileup from reads without bases...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
65da04ca85
commit
75b61a3663
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -97,11 +98,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
}
|
||||
|
||||
//private CigarElement getCurElement() { return curElement; }
|
||||
|
||||
public CigarOperator getCurrentCigarOperator() {
|
||||
//if ( curElement == null )
|
||||
// System.out.printf("%s%n", this);
|
||||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
|
|
@ -110,10 +107,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
//if ( cigarOffset == cigar.numCigarElements() )
|
||||
// return null; // we are done
|
||||
|
||||
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
|
||||
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
|
||||
cigarOffset++;
|
||||
if ( cigarOffset < nCigarElements ) {
|
||||
|
|
@ -135,14 +128,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
break;
|
||||
case S : // soft clip
|
||||
case I : // insertion w.r.t. the reference
|
||||
// readOffset++; done = true; break;
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
// cigarElementCounter = curElement.getLength();
|
||||
// genomeOffset += curElement.getLength();
|
||||
// break;
|
||||
case D : // deletion w.r.t. the reference
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
@ -155,8 +144,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
//if (DEBUG2) System.out.printf("stepForwardOnGenome3: done=%b curElement=%s, counter=%d, len=%d, offset=%d%n",
|
||||
// done, curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1, getReadOffset());
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
|
@ -214,37 +201,32 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public AlignmentContext next() {
|
||||
//if (DEBUG) {
|
||||
// logger.debug("in Next:");
|
||||
// printState();
|
||||
//}
|
||||
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
|
||||
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||
while(true) {
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||
|
||||
int size = 0;
|
||||
int nDeletions = 0;
|
||||
|
||||
// todo -- performance problem -- should be lazy, really
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
size++;
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
PileupElement p = new PileupElement(state.getRead(), state.getReadOffset());
|
||||
pile.add(p);
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
}
|
||||
GenomeLoc loc = getLocation();
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
|
||||
//if (DEBUG) {
|
||||
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
||||
// printState();
|
||||
//}
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
|
||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int[] counts = new int[4]; // cached value of counts
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
|
|
@ -56,6 +55,22 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.nDeletions = nDeletions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
|
|
@ -64,17 +79,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
} else {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
|
||||
if (index == -1)
|
||||
continue;
|
||||
counts[index]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -244,19 +253,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
* @return
|
||||
*/
|
||||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() ) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
if (index != -1)
|
||||
counts[index]++;
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
// int[] counts = new int[4];
|
||||
// for ( PileupElement pile : this ) {
|
||||
// // skip deletion sites
|
||||
// if ( ! pile.isDeletion() ) {
|
||||
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
// if (index == -1)
|
||||
// continue;
|
||||
// counts[index]++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -279,8 +279,11 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
Cigar cigar = new Cigar();
|
||||
cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M));
|
||||
int len = alignmentEnd - alignmentStart + 1;
|
||||
cigar.add(new CigarElement(len, CigarOperator.M));
|
||||
record.setCigar(cigar);
|
||||
record.setReadBases(new byte[len]);
|
||||
record.setBaseQualities(new byte[len]);
|
||||
return record;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue