Updated, optimized REadBackedPileup. Updated test that was breaking the build -- it created a pileup from reads without bases...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
65da04ca85
commit
75b61a3663
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
|
@ -97,11 +98,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
//private CigarElement getCurElement() { return curElement; }
|
|
||||||
|
|
||||||
public CigarOperator getCurrentCigarOperator() {
|
public CigarOperator getCurrentCigarOperator() {
|
||||||
//if ( curElement == null )
|
|
||||||
// System.out.printf("%s%n", this);
|
|
||||||
return curElement.getOperator();
|
return curElement.getOperator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -110,10 +107,6 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public CigarOperator stepForwardOnGenome() {
|
public CigarOperator stepForwardOnGenome() {
|
||||||
//if ( cigarOffset == cigar.numCigarElements() )
|
|
||||||
// return null; // we are done
|
|
||||||
|
|
||||||
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
|
|
||||||
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
|
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
|
||||||
cigarOffset++;
|
cigarOffset++;
|
||||||
if ( cigarOffset < nCigarElements ) {
|
if ( cigarOffset < nCigarElements ) {
|
||||||
|
|
@ -135,14 +128,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
break;
|
break;
|
||||||
case S : // soft clip
|
case S : // soft clip
|
||||||
case I : // insertion w.r.t. the reference
|
case I : // insertion w.r.t. the reference
|
||||||
// readOffset++; done = true; break;
|
|
||||||
cigarElementCounter = curElement.getLength();
|
cigarElementCounter = curElement.getLength();
|
||||||
readOffset += curElement.getLength();
|
readOffset += curElement.getLength();
|
||||||
break;
|
break;
|
||||||
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||||
// cigarElementCounter = curElement.getLength();
|
|
||||||
// genomeOffset += curElement.getLength();
|
|
||||||
// break;
|
|
||||||
case D : // deletion w.r.t. the reference
|
case D : // deletion w.r.t. the reference
|
||||||
genomeOffset++;
|
genomeOffset++;
|
||||||
done = true;
|
done = true;
|
||||||
|
|
@ -155,8 +144,6 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||||
}
|
}
|
||||||
|
|
||||||
//if (DEBUG2) System.out.printf("stepForwardOnGenome3: done=%b curElement=%s, counter=%d, len=%d, offset=%d%n",
|
|
||||||
// done, curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1, getReadOffset());
|
|
||||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -214,37 +201,32 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
public AlignmentContext next() {
|
public AlignmentContext next() {
|
||||||
//if (DEBUG) {
|
|
||||||
// logger.debug("in Next:");
|
|
||||||
// printState();
|
|
||||||
//}
|
|
||||||
|
|
||||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
|
||||||
|
|
||||||
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||||
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||||
while(true) {
|
while(true) {
|
||||||
|
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||||
|
|
||||||
|
int size = 0;
|
||||||
|
int nDeletions = 0;
|
||||||
|
|
||||||
// todo -- performance problem -- should be lazy, really
|
// todo -- performance problem -- should be lazy, really
|
||||||
for ( SAMRecordState state : readStates ) {
|
for ( SAMRecordState state : readStates ) {
|
||||||
|
size++;
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
PileupElement p = new PileupElement(state.getRead(), state.getReadOffset());
|
||||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
pile.add(p);
|
||||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
pile.add(new PileupElement(state.getRead(), -1));
|
pile.add(new PileupElement(state.getRead(), -1));
|
||||||
|
nDeletions++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GenomeLoc loc = getLocation();
|
GenomeLoc loc = getLocation();
|
||||||
|
|
||||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
|
|
||||||
//if (DEBUG) {
|
|
||||||
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
|
||||||
// printState();
|
|
||||||
//}
|
|
||||||
// if we got reads with non-D/N over the current position, we are done
|
// if we got reads with non-D/N over the current position, we are done
|
||||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
|
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
|
|
||||||
private int size = 0; // cached value of the size of the pileup
|
private int size = 0; // cached value of the size of the pileup
|
||||||
private int nDeletions = 0; // cached value of the number of deletions
|
private int nDeletions = 0; // cached value of the number of deletions
|
||||||
private int[] counts = new int[4]; // cached value of counts
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||||
|
|
@ -56,6 +55,22 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
calculatedCachedData();
|
calculatedCachedData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optimization of above constructor where all of the cached data is provided
|
||||||
|
* @param loc
|
||||||
|
* @param pileup
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
|
||||||
|
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||||
|
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||||
|
|
||||||
|
this.loc = loc;
|
||||||
|
this.pileup = pileup;
|
||||||
|
this.size = size;
|
||||||
|
this.nDeletions = nDeletions;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||||
|
|
@ -64,17 +79,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
private void calculatedCachedData() {
|
private void calculatedCachedData() {
|
||||||
size = 0;
|
size = 0;
|
||||||
nDeletions = 0;
|
nDeletions = 0;
|
||||||
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
|
|
||||||
|
|
||||||
for ( PileupElement p : this ) {
|
for ( PileupElement p : this ) {
|
||||||
size++;
|
size++;
|
||||||
if ( p.isDeletion() ) {
|
if ( p.isDeletion() ) {
|
||||||
nDeletions++;
|
nDeletions++;
|
||||||
} else {
|
|
||||||
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
|
|
||||||
if (index == -1)
|
|
||||||
continue;
|
|
||||||
counts[index]++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -244,19 +253,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public int[] getBaseCounts() {
|
public int[] getBaseCounts() {
|
||||||
|
int[] counts = new int[4];
|
||||||
|
for ( PileupElement pile : this ) {
|
||||||
|
// skip deletion sites
|
||||||
|
if ( ! pile.isDeletion() ) {
|
||||||
|
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||||
|
if (index != -1)
|
||||||
|
counts[index]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return counts;
|
return counts;
|
||||||
// int[] counts = new int[4];
|
|
||||||
// for ( PileupElement pile : this ) {
|
|
||||||
// // skip deletion sites
|
|
||||||
// if ( ! pile.isDeletion() ) {
|
|
||||||
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
|
||||||
// if (index == -1)
|
|
||||||
// continue;
|
|
||||||
// counts[index]++;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return counts;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -279,8 +279,11 @@ public abstract class LocusViewTemplate extends BaseTest {
|
||||||
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
|
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
|
||||||
record.setAlignmentStart(alignmentStart);
|
record.setAlignmentStart(alignmentStart);
|
||||||
Cigar cigar = new Cigar();
|
Cigar cigar = new Cigar();
|
||||||
cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M));
|
int len = alignmentEnd - alignmentStart + 1;
|
||||||
|
cigar.add(new CigarElement(len, CigarOperator.M));
|
||||||
record.setCigar(cigar);
|
record.setCigar(cigar);
|
||||||
|
record.setReadBases(new byte[len]);
|
||||||
|
record.setBaseQualities(new byte[len]);
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue