Refactor updateReadStates into PerSampleReadStateManager, add tracking of downsampling rate
This commit is contained in:
parent
a4334a67e0
commit
5c2799554a
|
|
@ -63,6 +63,8 @@ public class LIBSPerformance extends CommandLineProgram {
|
||||||
@Argument(fullName = "L", shortName = "L", doc = "Query location", required = false)
|
@Argument(fullName = "L", shortName = "L", doc = "Query location", required = false)
|
||||||
public String location = null;
|
public String location = null;
|
||||||
|
|
||||||
|
@Argument(fullName = "dt", shortName = "dt", doc = "Enable downsampling", required = false)
|
||||||
|
public boolean downsample = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int execute() throws IOException {
|
public int execute() throws IOException {
|
||||||
|
|
@ -86,7 +88,7 @@ public class LIBSPerformance extends CommandLineProgram {
|
||||||
for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() )
|
for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() )
|
||||||
samples.add(rg.getSample());
|
samples.add(rg.getSample());
|
||||||
|
|
||||||
final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(false, -1);
|
final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(downsample, 250);
|
||||||
|
|
||||||
final LocusIteratorByState libs =
|
final LocusIteratorByState libs =
|
||||||
new LocusIteratorByState(
|
new LocusIteratorByState(
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -50,6 +51,8 @@ import java.util.*;
|
||||||
* Time: 2:02 PM
|
* Time: 2:02 PM
|
||||||
*/
|
*/
|
||||||
final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> {
|
final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateManager.PerSampleReadStateManager>> {
|
||||||
|
private final static Logger logger = Logger.getLogger(ReadStateManager.class);
|
||||||
|
private final static boolean CAPTURE_DOWNSAMPLING_STATS = true;
|
||||||
private final List<String> samples;
|
private final List<String> samples;
|
||||||
private final PeekableIterator<GATKSAMRecord> iterator;
|
private final PeekableIterator<GATKSAMRecord> iterator;
|
||||||
private final SamplePartitioner<GATKSAMRecord> samplePartitioner;
|
private final SamplePartitioner<GATKSAMRecord> samplePartitioner;
|
||||||
|
|
@ -138,18 +141,8 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
* of the next pileup.
|
* of the next pileup.
|
||||||
*/
|
*/
|
||||||
public void updateReadStates() {
|
public void updateReadStates() {
|
||||||
for (final PerSampleReadStateManager readStateManager : readStatesBySample.values() ) {
|
for (final PerSampleReadStateManager perSampleReadStateManager : readStatesBySample.values() ) {
|
||||||
final Iterator<AlignmentStateMachine> it = readStateManager.iterator();
|
perSampleReadStateManager.updateReadStates();
|
||||||
while (it.hasNext()) {
|
|
||||||
final AlignmentStateMachine state = it.next();
|
|
||||||
final CigarOperator op = state.stepForwardOnGenome();
|
|
||||||
if (op == null) {
|
|
||||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
|
||||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
|
||||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
|
||||||
it.remove(); // we've stepped off the end of the object
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -301,13 +294,17 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO -- refactor into separate class with pointer to ReadStateManager for updates to the total counts
|
// TODO -- refactor into separate class with pointer to ReadStateManager for updates to the total counts
|
||||||
protected class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
|
protected final class PerSampleReadStateManager implements Iterable<AlignmentStateMachine> {
|
||||||
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
|
private List<LinkedList<AlignmentStateMachine>> readStatesByAlignmentStart = new LinkedList<LinkedList<AlignmentStateMachine>>();
|
||||||
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
|
private final Downsampler<LinkedList<AlignmentStateMachine>> levelingDownsampler;
|
||||||
|
|
||||||
private int thisSampleReadStates = 0;
|
private int thisSampleReadStates = 0;
|
||||||
|
|
||||||
|
private final int downsamplingTarget;
|
||||||
|
private int nSitesNeedingDownsampling = 0;
|
||||||
|
private int nSites = 0;
|
||||||
|
|
||||||
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||||
|
this.downsamplingTarget = LIBSDownsamplingInfo.isPerformDownsampling() ? LIBSDownsamplingInfo.getToCoverage() : -1;
|
||||||
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
||||||
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
|
? new LevelingDownsampler<LinkedList<AlignmentStateMachine>, AlignmentStateMachine>(LIBSDownsamplingInfo.getToCoverage())
|
||||||
: null;
|
: null;
|
||||||
|
|
@ -326,7 +323,8 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
thisSampleReadStates += states.size();
|
thisSampleReadStates += states.size();
|
||||||
totalReadStates += states.size();
|
totalReadStates += states.size();
|
||||||
|
|
||||||
if ( levelingDownsampler != null ) {
|
if ( isDownsampling() ) {
|
||||||
|
captureDownsamplingStats();
|
||||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||||
levelingDownsampler.signalEndOfInput();
|
levelingDownsampler.signalEndOfInput();
|
||||||
|
|
||||||
|
|
@ -339,6 +337,28 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isDownsampling() {
|
||||||
|
return levelingDownsampler != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("isDownsampling()")
|
||||||
|
private void captureDownsamplingStats() {
|
||||||
|
if ( CAPTURE_DOWNSAMPLING_STATS ) {
|
||||||
|
nSites++;
|
||||||
|
final int loc = getFirst().getGenomePosition();
|
||||||
|
String message = "Pass through";
|
||||||
|
final boolean downsampling = thisSampleReadStates > downsamplingTarget;
|
||||||
|
if ( downsampling ) {
|
||||||
|
nSitesNeedingDownsampling++;
|
||||||
|
message = "Downsampling";
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( downsampling || nSites % 10000 == 0 )
|
||||||
|
logger.info(String.format("%20s at %s: coverage=%d, max=%d, fraction of downsampled sites=%.2e",
|
||||||
|
message, loc, thisSampleReadStates, downsamplingTarget, (1.0 * nSitesNeedingDownsampling / nSites)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return readStatesByAlignmentStart.isEmpty();
|
return readStatesByAlignmentStart.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
@ -351,11 +371,25 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
return thisSampleReadStates;
|
return thisSampleReadStates;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void updateReadStates() {
|
||||||
|
final Iterator<AlignmentStateMachine> it = iterator();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
final AlignmentStateMachine state = it.next();
|
||||||
|
final CigarOperator op = state.stepForwardOnGenome();
|
||||||
|
if (op == null) {
|
||||||
|
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||||
|
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||||
|
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||||
|
it.remove(); // we've stepped off the end of the object
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Iterator<AlignmentStateMachine> iterator() {
|
public Iterator<AlignmentStateMachine> iterator() {
|
||||||
return new Iterator<AlignmentStateMachine>() {
|
return new Iterator<AlignmentStateMachine>() {
|
||||||
private Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
private final Iterator<LinkedList<AlignmentStateMachine>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||||
private LinkedList<AlignmentStateMachine> currentPositionReadStates = null;
|
private LinkedList<AlignmentStateMachine> currentPositionReadStates;
|
||||||
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator = null;
|
private Iterator<AlignmentStateMachine> currentPositionReadStatesIterator;
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return alignmentStartIterator.hasNext() ||
|
return alignmentStartIterator.hasNext() ||
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue