From 5a5422e4f8220ecde133490eeef6b58fa3084397 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 13 Jan 2013 13:02:17 -0500 Subject: [PATCH] Refactor PerSampleReadStates into a separate class -- No longer update the total counts in each per-sample state manager, but instead return delta counts that are updated by the overall ReadStateManager -- One step on the way to improving the underlying representation of the data in PerSampleReadStateManager -- Make LocusIteratorByState final --- .../locusiterator/LocusIteratorByState.java | 6 +- .../PerSampleReadStateManager.java | 203 ++++++++++++++++++ .../utils/locusiterator/ReadStateManager.java | 138 +----------- .../LocusIteratorByStateUnitTest.java | 5 +- ...=> PerSampleReadStateManagerUnitTest.java} | 11 +- 5 files changed, 214 insertions(+), 149 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java rename public/java/test/org/broadinstitute/sting/utils/locusiterator/{ReadStateManagerUnitTest.java => PerSampleReadStateManagerUnitTest.java} (92%) diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index 9499bfa35..e7b75f1f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -65,7 +65,7 @@ import java.util.*; * occurs, if requested. This allows users of LIBS to see both a ReadBackedPileup view of the data as well as * a stream of unique, sorted reads */ -public class LocusIteratorByState extends LocusIterator { +public final class LocusIteratorByState extends LocusIterator { /** * our log, which we want to capture anything from this class */ @@ -233,9 +233,9 @@ public class LocusIteratorByState extends LocusIterator { final GenomeLoc location = getLocation(); final Map fullPileup = new HashMap(); - for (final Map.Entry sampleStatePair : readStates ) { + for (final Map.Entry sampleStatePair : readStates ) { final String sample = sampleStatePair.getKey(); - final ReadStateManager.PerSampleReadStateManager readState = sampleStatePair.getValue(); + final PerSampleReadStateManager readState = sampleStatePair.getValue(); final Iterator iterator = readState.iterator(); final List pile = new ArrayList(readState.size()); diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java new file mode 100644 index 000000000..c2a47bbdb --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.locusiterator; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.CigarOperator; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.downsampling.Downsampler; +import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * ReadStateManager for a single sample + * + * User: depristo + * Date: 1/13/13 + * Time: 12:28 PM + */ +final class PerSampleReadStateManager implements Iterable { + private final static Logger logger = Logger.getLogger(ReadStateManager.class); + private final static boolean CAPTURE_DOWNSAMPLING_STATS = true; + + private List> readStatesByAlignmentStart = new LinkedList>(); + private final Downsampler> levelingDownsampler; + private int thisSampleReadStates = 0; + + private final int downsamplingTarget; + private int nSitesNeedingDownsampling = 0; + private int nSites = 0; + + public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) { + this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1; + this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling() + ? new LevelingDownsampler, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage()) + : null; + } + + /** + * Assumes it can just keep the states linked lists without making a copy + * @param states the new states to add to this manager + * @return The change in the number of states, after including states and potentially downsampling + */ + @Requires("states != null") + @Ensures("result >= 0") + public int addStatesAtNextAlignmentStart(LinkedList states) { + if ( states.isEmpty() ) { + return 0; + } + + readStatesByAlignmentStart.add(states); + int nStatesAdded = states.size(); + + if ( isDownsampling() ) { + captureDownsamplingStats(); + levelingDownsampler.submit(readStatesByAlignmentStart); + levelingDownsampler.signalEndOfInput(); + + nStatesAdded -= levelingDownsampler.getNumberOfDiscardedItems(); + + // use returned List directly rather than make a copy, for efficiency's sake + readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems(); + levelingDownsampler.reset(); + } + + thisSampleReadStates += nStatesAdded; + return nStatesAdded; + } + + private boolean isDownsampling() { + return levelingDownsampler != null; + } + + private AlignmentStateMachine getFirst() { + if (readStatesByAlignmentStart.isEmpty()) + return null; + else + return readStatesByAlignmentStart.get(0).getFirst(); + } + + @Requires("isDownsampling()") + private void captureDownsamplingStats() { + if ( CAPTURE_DOWNSAMPLING_STATS ) { + nSites++; + final int loc = getFirst().getGenomePosition(); + String message = "Pass through"; + final boolean downsampling = thisSampleReadStates > downsamplingTarget; + if ( downsampling ) { + nSitesNeedingDownsampling++; + message = "Downsampling"; + } + + if ( downsampling || nSites % 10000 == 0 ) + logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e", + message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites))); + } + } + + /** + * Is there at least one alignment for this sample in this manager? + * @return true if there's at least one alignment, false otherwise + */ + public boolean isEmpty() { + return readStatesByAlignmentStart.isEmpty(); + } + + public AlignmentStateMachine peek() { + return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek(); + } + + /** + * Get the number of read states currently in this manager + * @return the number of read states + */ + @Ensures("result >= 0") + public int size() { + return thisSampleReadStates; + } + + /** + * Advances all read states forward by one element, removing states that are + * no long aligned to the current position. + * @return the number of states we're removed after advancing + */ + public int updateReadStates() { + int nRemoved = 0; + final Iterator it = iterator(); + while (it.hasNext()) { + final AlignmentStateMachine state = it.next(); + final CigarOperator op = state.stepForwardOnGenome(); + if (op == null) { + // we discard the read only when we are past its end AND indel at the end of the read (if any) was + // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe + // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. + it.remove(); // we've stepped off the end of the object + nRemoved++; + } + } + + return nRemoved; + } + + // todo -- reimplement + public Iterator iterator() { + return new Iterator() { + private final Iterator> alignmentStartIterator = readStatesByAlignmentStart.iterator(); + private LinkedList currentPositionReadStates; + private Iterator currentPositionReadStatesIterator; + + @Override + public boolean hasNext() { + return alignmentStartIterator.hasNext() || + (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext()); + } + + @Override + public AlignmentStateMachine next() { + if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) { + currentPositionReadStates = alignmentStartIterator.next(); + currentPositionReadStatesIterator = currentPositionReadStates.iterator(); + } + + return currentPositionReadStatesIterator.next(); + } + + @Override + public void remove() { + currentPositionReadStatesIterator.remove(); + thisSampleReadStates--; + + if ( currentPositionReadStates.isEmpty() ) { + alignmentStartIterator.remove(); + } + } + }; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java index 3276291ef..4011875a6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java @@ -28,10 +28,7 @@ package org.broadinstitute.sting.utils.locusiterator; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.picard.util.PeekableIterator; -import net.sf.samtools.CigarOperator; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.downsampling.Downsampler; -import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -50,9 +47,7 @@ import java.util.*; * Date: 1/5/13 * Time: 2:02 PM */ -final class ReadStateManager implements Iterable> { - private final static Logger logger = Logger.getLogger(ReadStateManager.class); - private final static boolean CAPTURE_DOWNSAMPLING_STATS = true; +final class ReadStateManager implements Iterable> { private final List samples; private final PeekableIterator iterator; private final SamplePartitioner samplePartitioner; @@ -97,7 +92,7 @@ final class ReadStateManager implements Iterable> iterator() { + public Iterator> iterator() { return readStatesBySample.entrySet().iterator(); } @@ -142,7 +137,7 @@ final class ReadStateManager implements Iterable { - private List> readStatesByAlignmentStart = new LinkedList>(); - private final Downsampler> levelingDownsampler; - private int thisSampleReadStates = 0; - - private final int downsamplingTarget; - private int nSitesNeedingDownsampling = 0; - private int nSites = 0; - - public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) { - this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1; - this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling() - ? new LevelingDownsampler, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage()) - : null; - } - - /** - * Assumes it can just keep the states linked lists without making a copy - * @param states - */ - public void addStatesAtNextAlignmentStart(LinkedList states) { - if ( states.isEmpty() ) { - return; - } - - readStatesByAlignmentStart.add(states); - thisSampleReadStates += states.size(); - totalReadStates += states.size(); - - if ( isDownsampling() ) { - captureDownsamplingStats(); - levelingDownsampler.submit(readStatesByAlignmentStart); - levelingDownsampler.signalEndOfInput(); - - thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - - // use returned List directly rather than make a copy, for efficiency's sake - readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems(); - levelingDownsampler.reset(); - } - } - - private boolean isDownsampling() { - return levelingDownsampler != null; - } - - @Requires("isDownsampling()") - private void captureDownsamplingStats() { - if ( CAPTURE_DOWNSAMPLING_STATS ) { - nSites++; - final int loc = getFirst().getGenomePosition(); - String message = "Pass through"; - final boolean downsampling = thisSampleReadStates > downsamplingTarget; - if ( downsampling ) { - nSitesNeedingDownsampling++; - message = "Downsampling"; - } - - if ( downsampling || nSites % 10000 == 0 ) - logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e", - message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites))); - } - } - - public boolean isEmpty() { - return readStatesByAlignmentStart.isEmpty(); - } - - public AlignmentStateMachine peek() { - return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek(); - } - - public int size() { - return thisSampleReadStates; - } - - public void updateReadStates() { - final Iterator it = iterator(); - while (it.hasNext()) { - final AlignmentStateMachine state = it.next(); - final CigarOperator op = state.stepForwardOnGenome(); - if (op == null) { - // we discard the read only when we are past its end AND indel at the end of the read (if any) was - // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe - // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. - it.remove(); // we've stepped off the end of the object - } - } - } - - public Iterator iterator() { - return new Iterator() { - private final Iterator> alignmentStartIterator = readStatesByAlignmentStart.iterator(); - private LinkedList currentPositionReadStates; - private Iterator currentPositionReadStatesIterator; - - public boolean hasNext() { - return alignmentStartIterator.hasNext() || - (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext()); - } - - public AlignmentStateMachine next() { - if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) { - currentPositionReadStates = alignmentStartIterator.next(); - currentPositionReadStatesIterator = currentPositionReadStates.iterator(); - } - - return currentPositionReadStatesIterator.next(); - } - - public void remove() { - currentPositionReadStatesIterator.remove(); - thisSampleReadStates--; - totalReadStates--; - - if ( currentPositionReadStates.isEmpty() ) { - alignmentStartIterator.remove(); - } - } - }; - } + totalReadStates += readStates.addStatesAtNextAlignmentStart(newReadStates); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index 727023b83..7ae2d97a1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -418,8 +418,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { for ( final boolean keepReads : Arrays.asList(true, false) ) { for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) { // for ( final int downsampleTo : Arrays.asList(1)) { -// for ( final int nReadsPerLocus : Arrays.asList(10) ) { -// for ( final int nLoci : Arrays.asList(25) ) { +// for ( final int nReadsPerLocus : Arrays.asList(1) ) { +// for ( final int nLoci : Arrays.asList(1) ) { // for ( final int nSamples : Arrays.asList(1) ) { // for ( final boolean keepReads : Arrays.asList(true) ) { // for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) { @@ -436,7 +436,6 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { return tests.toArray(new Object[][]{}); } - //@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests") @Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests") public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, final int nLoci, diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java similarity index 92% rename from public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java index 76b324d85..b9f2fb29a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java @@ -38,11 +38,7 @@ import java.util.*; /** * testing of the new (non-legacy) version of LocusIteratorByState */ -public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest { - /////////////////////////////////////// - // Read State Manager Tests // - /////////////////////////////////////// - +public class PerSampleReadStateManagerUnitTest extends LocusIteratorByStateBaseTest { private class PerSampleReadStateManagerTest extends TestDataProvider { private List readCountsPerAlignmentStart; private List reads; @@ -63,10 +59,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest { } public void run() { - final List samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups(); - final Iterator iterator = new LinkedList().iterator(); - ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false); - ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING); + PerSampleReadStateManager perSampleReadStateManager = new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING); makeReads();