Refactor PerSampleReadStates into a separate class
-- No longer update the total counts in each per-sample state manager, but instead return delta counts that are updated by the overall ReadStateManager -- One step on the way to improving the underlying representation of the data in PerSampleReadStateManager -- Make LocusIteratorByState final
This commit is contained in:
parent
5c2799554a
commit
5a5422e4f8
|
|
@ -65,7 +65,7 @@ import java.util.*;
|
|||
* occurs, if requested. This allows users of LIBS to see both a ReadBackedPileup view of the data as well as
|
||||
* a stream of unique, sorted reads
|
||||
*/
|
||||
public class LocusIteratorByState extends LocusIterator {
|
||||
public final class LocusIteratorByState extends LocusIterator {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
|
|
@ -233,9 +233,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
|
||||
for (final Map.Entry<String, ReadStateManager.PerSampleReadStateManager> sampleStatePair : readStates ) {
|
||||
for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates ) {
|
||||
final String sample = sampleStatePair.getKey();
|
||||
final ReadStateManager.PerSampleReadStateManager readState = sampleStatePair.getValue();
|
||||
final PerSampleReadStateManager readState = sampleStatePair.getValue();
|
||||
final Iterator<AlignmentStateMachine> iterator = readState.iterator();
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readState.size());
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* ReadStateManager for a single sample
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/13/13
|
||||
* Time: 12:28 PM
|
||||
*/
|
||||
final class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
|
||||
private final static Logger logger = Logger.getLogger(ReadStateManager.class);
|
||||
private final static boolean CAPTURE_DOWNSAMPLING_STATS = true;
|
||||
|
||||
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
|
||||
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
|
||||
private int thisSampleReadStates = 0;
|
||||
|
||||
private final int downsamplingTarget;
|
||||
private int nSitesNeedingDownsampling = 0;
|
||||
private int nSites = 0;
|
||||
|
||||
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1;
|
||||
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes it can just keep the states linked lists without making a copy
|
||||
* @param states the new states to add to this manager
|
||||
* @return The change in the number of states, after including states and potentially downsampling
|
||||
*/
|
||||
@Requires("states != null")
|
||||
@Ensures("result >= 0")
|
||||
public int addStatesAtNextAlignmentStart(LinkedList<AlignmentStateMachine> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(states);
|
||||
int nStatesAdded = states.size();
|
||||
|
||||
if ( isDownsampling() ) {
|
||||
captureDownsamplingStats();
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
nStatesAdded -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
|
||||
thisSampleReadStates += nStatesAdded;
|
||||
return nStatesAdded;
|
||||
}
|
||||
|
||||
private boolean isDownsampling() {
|
||||
return levelingDownsampler != null;
|
||||
}
|
||||
|
||||
private AlignmentStateMachine getFirst() {
|
||||
if (readStatesByAlignmentStart.isEmpty())
|
||||
return null;
|
||||
else
|
||||
return readStatesByAlignmentStart.get(0).getFirst();
|
||||
}
|
||||
|
||||
@Requires("isDownsampling()")
|
||||
private void captureDownsamplingStats() {
|
||||
if ( CAPTURE_DOWNSAMPLING_STATS ) {
|
||||
nSites++;
|
||||
final int loc = getFirst().getGenomePosition();
|
||||
String message = "Pass through";
|
||||
final boolean downsampling = thisSampleReadStates > downsamplingTarget;
|
||||
if ( downsampling ) {
|
||||
nSitesNeedingDownsampling++;
|
||||
message = "Downsampling";
|
||||
}
|
||||
|
||||
if ( downsampling || nSites % 10000 == 0 )
|
||||
logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e",
|
||||
message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there at least one alignment for this sample in this manager?
|
||||
* @return true if there's at least one alignment, false otherwise
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public AlignmentStateMachine peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of read states currently in this manager
|
||||
* @return the number of read states
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances all read states forward by one element, removing states that are
|
||||
* no long aligned to the current position.
|
||||
* @return the number of states we're removed after advancing
|
||||
*/
|
||||
public int updateReadStates() {
|
||||
int nRemoved = 0;
|
||||
final Iterator<AlignmentStateMachine> it = iterator();
|
||||
while (it.hasNext()) {
|
||||
final AlignmentStateMachine state = it.next();
|
||||
final CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
nRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
return nRemoved;
|
||||
}
|
||||
|
||||
// todo -- reimplement
|
||||
public Iterator<AlignmentStateMachine> iterator() {
|
||||
return new Iterator<AlignmentStateMachine>() {
|
||||
private final Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<AlignmentStateMachine> currentPositionReadStates;
|
||||
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
@Override
|
||||
public AlignmentStateMachine next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -28,10 +28,7 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -50,9 +47,7 @@ import java.util.*;
|
|||
* Date: 1/5/13
|
||||
* Time: 2:02 PM
|
||||
*/
|
||||
final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> {
|
||||
private final static Logger logger = Logger.getLogger(ReadStateManager.class);
|
||||
private final static boolean CAPTURE_DOWNSAMPLING_STATS = true;
|
||||
final class ReadStateManager implements Iterable<Map.Entry<String, PerSampleReadStateManager>> {
|
||||
private final List<String> samples;
|
||||
private final PeekableIterator<GATKSAMRecord> iterator;
|
||||
private final SamplePartitioner<GATKSAMRecord> samplePartitioner;
|
||||
|
|
@ -97,7 +92,7 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
|||
* @return Iterator over sample + per sample read state manager pairs for this read state manager.
|
||||
*/
|
||||
@Override
|
||||
public Iterator<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> iterator() {
|
||||
public Iterator<Map.Entry<String, PerSampleReadStateManager>> iterator() {
|
||||
return readStatesBySample.entrySet().iterator();
|
||||
}
|
||||
|
||||
|
|
@ -142,7 +137,7 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
|||
*/
|
||||
public void updateReadStates() {
|
||||
for (final PerSampleReadStateManager perSampleReadStateManager : readStatesBySample.values() ) {
|
||||
perSampleReadStateManager.updateReadStates();
|
||||
totalReadStates -= perSampleReadStateManager.updateReadStates();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -290,131 +285,6 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
|||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
// TODO -- refactor into separate class with pointer to ReadStateManager for updates to the total counts
|
||||
protected final class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
|
||||
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
|
||||
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
|
||||
private int thisSampleReadStates = 0;
|
||||
|
||||
private final int downsamplingTarget;
|
||||
private int nSitesNeedingDownsampling = 0;
|
||||
private int nSites = 0;
|
||||
|
||||
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1;
|
||||
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes it can just keep the states linked lists without making a copy
|
||||
* @param states
|
||||
*/
|
||||
public void addStatesAtNextAlignmentStart(LinkedList<AlignmentStateMachine> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(states);
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( isDownsampling() ) {
|
||||
captureDownsamplingStats();
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isDownsampling() {
|
||||
return levelingDownsampler != null;
|
||||
}
|
||||
|
||||
@Requires("isDownsampling()")
|
||||
private void captureDownsamplingStats() {
|
||||
if ( CAPTURE_DOWNSAMPLING_STATS ) {
|
||||
nSites++;
|
||||
final int loc = getFirst().getGenomePosition();
|
||||
String message = "Pass through";
|
||||
final boolean downsampling = thisSampleReadStates > downsamplingTarget;
|
||||
if ( downsampling ) {
|
||||
nSitesNeedingDownsampling++;
|
||||
message = "Downsampling";
|
||||
}
|
||||
|
||||
if ( downsampling || nSites % 10000 == 0 )
|
||||
logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e",
|
||||
message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites)));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public AlignmentStateMachine peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public void updateReadStates() {
|
||||
final Iterator<AlignmentStateMachine> it = iterator();
|
||||
while (it.hasNext()) {
|
||||
final AlignmentStateMachine state = it.next();
|
||||
final CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Iterator<AlignmentStateMachine> iterator() {
|
||||
return new Iterator<AlignmentStateMachine>() {
|
||||
private final Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<AlignmentStateMachine> currentPositionReadStates;
|
||||
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator;
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public AlignmentStateMachine next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
totalReadStates += readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -418,8 +418,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
||||
// for ( final int downsampleTo : Arrays.asList(1)) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(10) ) {
|
||||
// for ( final int nLoci : Arrays.asList(25) ) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
|
||||
// for ( final int nLoci : Arrays.asList(1) ) {
|
||||
// for ( final int nSamples : Arrays.asList(1) ) {
|
||||
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||
|
|
@ -436,7 +436,6 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
//@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||
public void testLIBS_ComplexPileupTests(final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
|
|
|
|||
|
|
@ -38,11 +38,7 @@ import java.util.*;
|
|||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
public class PerSampleReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
|
|
@ -63,10 +59,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
|||
}
|
||||
|
||||
public void run() {
|
||||
final List<String> samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups();
|
||||
final Iterator<GATKSAMRecord> iterator = new LinkedList<GATKSAMRecord>().iterator();
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
|
||||
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
PerSampleReadStateManager perSampleReadStateManager = new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
|
||||
makeReads();
|
||||
|
||||
Loading…
Reference in New Issue