Refactor PerSampleReadStates into a separate class

-- No longer update the total counts in each per-sample state manager, but instead return delta counts that are updated by the overall ReadStateManager
-- One step on the way to improving the underlying representation of the data in PerSampleReadStateManager
-- Make LocusIteratorByState final
This commit is contained in:
Mark DePristo 2013-01-13 13:02:17 -05:00
parent 5c2799554a
commit 5a5422e4f8
5 changed files with 214 additions and 149 deletions

View File

@ -65,7 +65,7 @@ import java.util.*;
* occurs, if requested. This allows users of LIBS to see both a ReadBackedPileup view of the data as well as
* a stream of unique, sorted reads
*/
public class LocusIteratorByState extends LocusIterator {
public final class LocusIteratorByState extends LocusIterator {
/**
* our log, which we want to capture anything from this class
*/
@ -233,9 +233,9 @@ public class LocusIteratorByState extends LocusIterator {
final GenomeLoc location = getLocation();
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
for (final Map.Entry<String, ReadStateManager.PerSampleReadStateManager> sampleStatePair : readStates ) {
for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates ) {
final String sample = sampleStatePair.getKey();
final ReadStateManager.PerSampleReadStateManager readState = sampleStatePair.getValue();
final PerSampleReadStateManager readState = sampleStatePair.getValue();
final Iterator<AlignmentStateMachine> iterator = readState.iterator();
final List<PileupElement> pile = new ArrayList<PileupElement>(readState.size());

View File

@ -0,0 +1,203 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarOperator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
/**
* ReadStateManager for a single sample
*
* User: depristo
* Date: 1/13/13
* Time: 12:28 PM
*/
final class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
private final static Logger logger = Logger.getLogger(ReadStateManager.class);
private final static boolean CAPTURE_DOWNSAMPLING_STATS = true;
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
private int thisSampleReadStates = 0;
private final int downsamplingTarget;
private int nSitesNeedingDownsampling = 0;
private int nSites = 0;
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1;
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
: null;
}
/**
* Assumes it can just keep the states linked lists without making a copy
* @param states the new states to add to this manager
* @return The change in the number of states, after including states and potentially downsampling
*/
@Requires("states != null")
@Ensures("result >= 0")
public int addStatesAtNextAlignmentStart(LinkedList<AlignmentStateMachine> states) {
if ( states.isEmpty() ) {
return 0;
}
readStatesByAlignmentStart.add(states);
int nStatesAdded = states.size();
if ( isDownsampling() ) {
captureDownsamplingStats();
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
nStatesAdded -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
thisSampleReadStates += nStatesAdded;
return nStatesAdded;
}
private boolean isDownsampling() {
return levelingDownsampler != null;
}
private AlignmentStateMachine getFirst() {
if (readStatesByAlignmentStart.isEmpty())
return null;
else
return readStatesByAlignmentStart.get(0).getFirst();
}
@Requires("isDownsampling()")
private void captureDownsamplingStats() {
if ( CAPTURE_DOWNSAMPLING_STATS ) {
nSites++;
final int loc = getFirst().getGenomePosition();
String message = "Pass through";
final boolean downsampling = thisSampleReadStates > downsamplingTarget;
if ( downsampling ) {
nSitesNeedingDownsampling++;
message = "Downsampling";
}
if ( downsampling || nSites % 10000 == 0 )
logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e",
message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites)));
}
}
/**
* Is there at least one alignment for this sample in this manager?
* @return true if there's at least one alignment, false otherwise
*/
public boolean isEmpty() {
return readStatesByAlignmentStart.isEmpty();
}
public AlignmentStateMachine peek() {
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
}
/**
* Get the number of read states currently in this manager
* @return the number of read states
*/
@Ensures("result >= 0")
public int size() {
return thisSampleReadStates;
}
/**
* Advances all read states forward by one element, removing states that are
* no long aligned to the current position.
* @return the number of states we're removed after advancing
*/
public int updateReadStates() {
int nRemoved = 0;
final Iterator<AlignmentStateMachine> it = iterator();
while (it.hasNext()) {
final AlignmentStateMachine state = it.next();
final CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
nRemoved++;
}
}
return nRemoved;
}
// todo -- reimplement
public Iterator<AlignmentStateMachine> iterator() {
return new Iterator<AlignmentStateMachine>() {
private final Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<AlignmentStateMachine> currentPositionReadStates;
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator;
@Override
public boolean hasNext() {
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
}
@Override
public AlignmentStateMachine next() {
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
}
@Override
public void remove() {
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
}
};
}
}

View File

@ -28,10 +28,7 @@ package org.broadinstitute.sting.utils.locusiterator;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.CigarOperator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -50,9 +47,7 @@ import java.util.*;
* Date: 1/5/13
* Time: 2:02 PM
*/
final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> {
private final static Logger logger = Logger.getLogger(ReadStateManager.class);
private final static boolean CAPTURE_DOWNSAMPLING_STATS = true;
final class ReadStateManager implements Iterable<Map.Entry<String, PerSampleReadStateManager>> {
private final List<String> samples;
private final PeekableIterator<GATKSAMRecord> iterator;
private final SamplePartitioner<GATKSAMRecord> samplePartitioner;
@ -97,7 +92,7 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
* @return Iterator over sample + per sample read state manager pairs for this read state manager.
*/
@Override
public Iterator<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> iterator() {
public Iterator<Map.Entry<String, PerSampleReadStateManager>> iterator() {
return readStatesBySample.entrySet().iterator();
}
@ -142,7 +137,7 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
*/
public void updateReadStates() {
for (final PerSampleReadStateManager perSampleReadStateManager : readStatesBySample.values() ) {
perSampleReadStateManager.updateReadStates();
totalReadStates -= perSampleReadStateManager.updateReadStates();
}
}
@ -290,131 +285,6 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
newReadStates.add(state);
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
}
// TODO -- refactor into separate class with pointer to ReadStateManager for updates to the total counts
protected final class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
private int thisSampleReadStates = 0;
private final int downsamplingTarget;
private int nSitesNeedingDownsampling = 0;
private int nSites = 0;
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1;
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
: null;
}
/**
* Assumes it can just keep the states linked lists without making a copy
* @param states
*/
public void addStatesAtNextAlignmentStart(LinkedList<AlignmentStateMachine> states) {
if ( states.isEmpty() ) {
return;
}
readStatesByAlignmentStart.add(states);
thisSampleReadStates += states.size();
totalReadStates += states.size();
if ( isDownsampling() ) {
captureDownsamplingStats();
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
}
private boolean isDownsampling() {
return levelingDownsampler != null;
}
@Requires("isDownsampling()")
private void captureDownsamplingStats() {
if ( CAPTURE_DOWNSAMPLING_STATS ) {
nSites++;
final int loc = getFirst().getGenomePosition();
String message = "Pass through";
final boolean downsampling = thisSampleReadStates > downsamplingTarget;
if ( downsampling ) {
nSitesNeedingDownsampling++;
message = "Downsampling";
}
if ( downsampling || nSites % 10000 == 0 )
logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e",
message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites)));
}
}
public boolean isEmpty() {
return readStatesByAlignmentStart.isEmpty();
}
public AlignmentStateMachine peek() {
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
}
public int size() {
return thisSampleReadStates;
}
public void updateReadStates() {
final Iterator<AlignmentStateMachine> it = iterator();
while (it.hasNext()) {
final AlignmentStateMachine state = it.next();
final CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}
}
}
public Iterator<AlignmentStateMachine> iterator() {
return new Iterator<AlignmentStateMachine>() {
private final Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<AlignmentStateMachine> currentPositionReadStates;
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator;
public boolean hasNext() {
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
}
public AlignmentStateMachine next() {
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
}
public void remove() {
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
totalReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
}
};
}
totalReadStates += readStates.addStatesAtNextAlignmentStart(newReadStates);
}
}

View File

@ -418,8 +418,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
for ( final boolean keepReads : Arrays.asList(true, false) ) {
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
// for ( final int downsampleTo : Arrays.asList(1)) {
// for ( final int nReadsPerLocus : Arrays.asList(10) ) {
// for ( final int nLoci : Arrays.asList(25) ) {
// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
// for ( final int nLoci : Arrays.asList(1) ) {
// for ( final int nSamples : Arrays.asList(1) ) {
// for ( final boolean keepReads : Arrays.asList(true) ) {
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
@ -436,7 +436,6 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
return tests.toArray(new Object[][]{});
}
//@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
public void testLIBS_ComplexPileupTests(final int nReadsPerLocus,
final int nLoci,

View File

@ -38,11 +38,7 @@ import java.util.*;
/**
* testing of the new (non-legacy) version of LocusIteratorByState
*/
public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
///////////////////////////////////////
// Read State Manager Tests //
///////////////////////////////////////
public class PerSampleReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
private class PerSampleReadStateManagerTest extends TestDataProvider {
private List<Integer> readCountsPerAlignmentStart;
private List<SAMRecord> reads;
@ -63,10 +59,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
}
public void run() {
final List<String> samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups();
final Iterator<GATKSAMRecord> iterator = new LinkedList<GATKSAMRecord>().iterator();
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
PerSampleReadStateManager perSampleReadStateManager = new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
makeReads();