Partial flattening of the hanger data structure. Hanger data structure is
not currently as flat as it could / should be, but it's already comparable to the speed of the reference implementation. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3512 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8f9e3e8ad7
commit
84563b37e5
|
|
@ -521,7 +521,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
private final int targetCoverage;
|
private final int targetCoverage;
|
||||||
private final int maxReadsAtLocus;
|
private final int maxReadsAtLocus;
|
||||||
|
|
||||||
private final Deque<Map<String,List<SAMRecordState>>> readStatesByAlignmentStart;
|
private final Map<String,Deque<List<SAMRecordState>>> readStatesBySample;
|
||||||
private int totalReadStatesInHanger = 0;
|
private int totalReadStatesInHanger = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -547,35 +547,40 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
this.targetCoverage = Integer.MAX_VALUE;
|
this.targetCoverage = Integer.MAX_VALUE;
|
||||||
}
|
}
|
||||||
this.maxReadsAtLocus = maxReadsAtLocus;
|
this.maxReadsAtLocus = maxReadsAtLocus;
|
||||||
if(downsamplingMethod.type == DownsampleType.NONE)
|
this.readStatesBySample = new HashMap<String,Deque<List<SAMRecordState>>>();
|
||||||
|
|
||||||
|
if(downsamplingMethod.type == DownsampleType.NONE) {
|
||||||
aggregatorsBySampleName.put(null,new ArrayList<SAMRecord>());
|
aggregatorsBySampleName.put(null,new ArrayList<SAMRecord>());
|
||||||
else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR)
|
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>());
|
||||||
aggregatorsBySampleName.put(null,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
}
|
||||||
else {
|
else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||||
for(String sampleName: sampleNames)
|
aggregatorsBySampleName.put(null,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||||
aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for(String sampleName: sampleNames) {
|
||||||
|
aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||||
|
readStatesBySample.put(sampleName,new LinkedList<List<SAMRecordState>>());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.readStatesByAlignmentStart = new LinkedList<Map<String,List<SAMRecordState>>>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<SAMRecordState> iterator() {
|
public Iterator<SAMRecordState> iterator() {
|
||||||
return new Iterator<SAMRecordState>() {
|
return new Iterator<SAMRecordState>() {
|
||||||
private Iterator<Map<String,List<SAMRecordState>>> alignmentStartIterator;
|
private final Iterator<Iterator<List<SAMRecordState>>> sampleIterators;
|
||||||
private Map<String,List<SAMRecordState>> currentAlignmentStart;
|
|
||||||
|
|
||||||
private Iterator<List<SAMRecordState>> sampleIterator;
|
private Iterator<List<SAMRecordState>> sampleIterator;
|
||||||
private List<SAMRecordState> currentSample;
|
private List<SAMRecordState> currentAlignmentStart;
|
||||||
|
private Iterator<SAMRecordState> alignmentStartIterator;
|
||||||
private Iterator<SAMRecordState> readStateIterator;
|
|
||||||
private SAMRecordState nextReadState;
|
private SAMRecordState nextReadState;
|
||||||
|
|
||||||
private int readsInHanger = totalReadStatesInHanger;
|
private int readsInHanger = totalReadStatesInHanger;
|
||||||
|
|
||||||
{
|
{
|
||||||
alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
List<Iterator<List<SAMRecordState>>> sampleIteratorList = new LinkedList<Iterator<List<SAMRecordState>>>();
|
||||||
currentAlignmentStart = alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
|
for(Deque<List<SAMRecordState>> hanger: readStatesBySample.values())
|
||||||
sampleIterator = currentAlignmentStart!=null ? currentAlignmentStart.values().iterator() : null;
|
sampleIteratorList.add(hanger.iterator());
|
||||||
currentSample = sampleIterator!=null && sampleIterator.hasNext() ? sampleIterator.next() : null;
|
sampleIterators = sampleIteratorList.iterator();
|
||||||
readStateIterator = currentSample!=null ? currentSample.iterator() : null;
|
advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
|
@ -589,49 +594,50 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
return nextReadState;
|
return nextReadState;
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
|
readsInHanger--;
|
||||||
nextReadState = null;
|
nextReadState = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
if(readStateIterator == null)
|
if(alignmentStartIterator == null)
|
||||||
throw new StingException("Attempted to remove read, but no previous read was found.");
|
throw new StingException("Cannot remove read -- iterator is in an invalid state.");
|
||||||
readStateIterator.remove();
|
alignmentStartIterator.remove();
|
||||||
if(currentSample.isEmpty()) sampleIterator.remove();
|
if(currentAlignmentStart.isEmpty())
|
||||||
if(currentAlignmentStart.isEmpty()) alignmentStartIterator.remove();
|
sampleIterator.remove();
|
||||||
totalReadStatesInHanger--;
|
totalReadStatesInHanger--;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void advance() {
|
private void advance() {
|
||||||
nextReadState = null;
|
// nextReadState != null indicates that we haven't returned this value from the next() method yet.
|
||||||
if(readStateIterator!=null && readStateIterator.hasNext())
|
if(nextReadState != null)
|
||||||
nextReadState = readStateIterator.next();
|
return;
|
||||||
else if(sampleIterator!=null && sampleIterator.hasNext()) {
|
if(alignmentStartIterator!=null&&alignmentStartIterator.hasNext()) {
|
||||||
currentSample = sampleIterator.next();
|
nextReadState = alignmentStartIterator.next();
|
||||||
readStateIterator = currentSample.iterator();
|
|
||||||
nextReadState = readStateIterator.hasNext() ? readStateIterator.next() : null;
|
|
||||||
}
|
}
|
||||||
else if(alignmentStartIterator!=null && alignmentStartIterator.hasNext()) {
|
else if(sampleIterator!=null&&sampleIterator.hasNext()) {
|
||||||
currentAlignmentStart = alignmentStartIterator.next();
|
currentAlignmentStart = sampleIterator.next();
|
||||||
sampleIterator = currentAlignmentStart != null ? currentAlignmentStart.values().iterator() : null;
|
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
|
||||||
currentSample = (sampleIterator!=null && sampleIterator.hasNext()) ? sampleIterator.next() : null;
|
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
|
||||||
readStateIterator = currentSample!=null ? currentSample.iterator() : null;
|
}
|
||||||
nextReadState = (readStateIterator!=null && readStateIterator.hasNext()) ? readStateIterator.next() : null;
|
else if(sampleIterators.hasNext()) {
|
||||||
|
sampleIterator = sampleIterators.next();
|
||||||
|
currentAlignmentStart = sampleIterator!=null&&sampleIterator.hasNext() ? sampleIterator.next() : null;
|
||||||
|
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
|
||||||
|
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nextReadState != null) readsInHanger--;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return readStatesByAlignmentStart.isEmpty();
|
return readStatesBySample.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int size() {
|
public int size() {
|
||||||
int size = 0;
|
int size = 0;
|
||||||
for(Map<String,List<SAMRecordState>> readStatesBySample: readStatesByAlignmentStart) {
|
for(Deque<List<SAMRecordState>> readStatesByAlignmentStart: readStatesBySample.values()) {
|
||||||
for(Collection<SAMRecordState> readStates: readStatesBySample.values())
|
for(Collection<SAMRecordState> readStates: readStatesByAlignmentStart)
|
||||||
size += readStates.size();
|
size += readStates.size();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
|
|
@ -642,7 +648,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return !readStatesByAlignmentStart.isEmpty() || iterator.hasNext();
|
return totalReadStatesInHanger > 0 || iterator.hasNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void collectPendingReads() {
|
public void collectPendingReads() {
|
||||||
|
|
@ -665,7 +671,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String,List<SAMRecordState>> culledReadStatesBySample = new HashMap<String,List<SAMRecordState>>();
|
|
||||||
int readStatesInHangerEntry = 0;
|
int readStatesInHangerEntry = 0;
|
||||||
|
|
||||||
for(Map.Entry<String,Collection<SAMRecord>> entry: aggregatorsBySampleName.entrySet()) {
|
for(Map.Entry<String,Collection<SAMRecord>> entry: aggregatorsBySampleName.entrySet()) {
|
||||||
|
|
@ -674,6 +679,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator);
|
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator);
|
||||||
aggregator.clear();
|
aggregator.clear();
|
||||||
|
|
||||||
|
Deque<List<SAMRecordState>> hanger = readStatesBySample.get(sampleName);
|
||||||
int readsInHanger = countReadsInHanger(sampleName);
|
int readsInHanger = countReadsInHanger(sampleName);
|
||||||
|
|
||||||
if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||||
|
|
@ -683,15 +690,15 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
readLimit = maxReadsAtLocus-totalReadStatesInHanger;
|
readLimit = maxReadsAtLocus-totalReadStatesInHanger;
|
||||||
mrlViolation = true;
|
mrlViolation = true;
|
||||||
}
|
}
|
||||||
readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,readLimit,mrlViolation);
|
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,readLimit,mrlViolation);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Iterator<Map<String,List<SAMRecordState>>> backIterator = readStatesByAlignmentStart.descendingIterator();
|
Iterator<List<SAMRecordState>> backIterator = hanger.descendingIterator();
|
||||||
boolean readPruned = true;
|
boolean readPruned = true;
|
||||||
while(readsInHanger+newReads.size()>targetCoverage && readPruned) {
|
while(readsInHanger+newReads.size()>targetCoverage && readPruned) {
|
||||||
readPruned = false;
|
readPruned = false;
|
||||||
while(readsInHanger+newReads.size()>targetCoverage && backIterator.hasNext()) {
|
while(readsInHanger+newReads.size()>targetCoverage && backIterator.hasNext()) {
|
||||||
List<SAMRecordState> readsAtLocus = backIterator.next().get(sampleName);
|
List<SAMRecordState> readsAtLocus = backIterator.next();
|
||||||
if(readsAtLocus.size() > 1) {
|
if(readsAtLocus.size() > 1) {
|
||||||
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
||||||
readPruned = true;
|
readPruned = true;
|
||||||
|
|
@ -701,16 +708,13 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(readsInHanger == targetCoverage) {
|
if(readsInHanger == targetCoverage) {
|
||||||
Collection<SAMRecordState> firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName);
|
List<SAMRecordState> readsInFirstHanger = hanger.remove();
|
||||||
readsInHanger -= firstHangerForSample.size();
|
readsInHanger -= readsInFirstHanger.size();
|
||||||
firstHangerForSample.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,targetCoverage-readsInHanger,false);
|
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,targetCoverage-readsInHanger,false);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if(!culledReadStatesBySample.isEmpty()) {
|
|
||||||
readStatesByAlignmentStart.add(culledReadStatesBySample);
|
|
||||||
totalReadStatesInHanger += readStatesInHangerEntry;
|
totalReadStatesInHanger += readStatesInHangerEntry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -724,9 +728,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
private int countReadsInHanger(final String sampleName) {
|
private int countReadsInHanger(final String sampleName) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for(Map<String,List<SAMRecordState>> hangerEntry: readStatesByAlignmentStart) {
|
for(List<SAMRecordState> hangerEntry: readStatesBySample.get(sampleName)) {
|
||||||
if(sampleName == null && hangerEntry.containsKey(sampleName))
|
count += hangerEntry.size();
|
||||||
count += hangerEntry.get(sampleName).size();
|
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
@ -734,12 +737,11 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
/**
|
/**
|
||||||
* Add reads with the given sample name to the given hanger entry.
|
* Add reads with the given sample name to the given hanger entry.
|
||||||
* @param newHangerEntry The hanger entry to add.
|
* @param newHangerEntry The hanger entry to add.
|
||||||
* @param sampleName Sample name of the given reads. Should match the entry in each read's read group.
|
|
||||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||||
* @param maxReads Maximum number of reads to add.
|
* @param maxReads Maximum number of reads to add.
|
||||||
* @return Total number of reads added.
|
* @return Total number of reads added.
|
||||||
*/
|
*/
|
||||||
private int addReadsToHanger(final Map<String,List<SAMRecordState>> newHangerEntry, final String sampleName, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) {
|
private int addReadsToHanger(final Deque<List<SAMRecordState>> newHangerEntry, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) {
|
||||||
if(reads.isEmpty())
|
if(reads.isEmpty())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
@ -748,13 +750,13 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
// the farthest right a read extends
|
// the farthest right a read extends
|
||||||
Integer rightMostEnd = -1;
|
Integer rightMostEnd = -1;
|
||||||
|
|
||||||
List<SAMRecordState> readStatesBySample = new LinkedList<SAMRecordState>();
|
List<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
for(SAMRecord read: reads) {
|
for(SAMRecord read: reads) {
|
||||||
if(readCount <= maxReads) {
|
if(readCount <= maxReads) {
|
||||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||||
state.stepForwardOnGenome();
|
state.stepForwardOnGenome();
|
||||||
readStatesBySample.add(state);
|
readStates.add(state);
|
||||||
// TODO: What if we downsample the extended events away?
|
// TODO: What if we downsample the extended events away?
|
||||||
if (state.hadIndel()) hasExtendedEvents = true;
|
if (state.hadIndel()) hasExtendedEvents = true;
|
||||||
readCount++;
|
readCount++;
|
||||||
|
|
@ -765,7 +767,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
newHangerEntry.put(sampleName,readStatesBySample);
|
newHangerEntry.add(readStates);
|
||||||
|
|
||||||
if (location != null)
|
if (location != null)
|
||||||
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue