changes to intelligently log overflowing locus pile-ups.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2640 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4ac9eb7cb2
commit
a1b4cc4baf
|
|
@ -32,7 +32,7 @@ public class Reads {
|
||||||
private Integer downsampleToCoverage = null;
|
private Integer downsampleToCoverage = null;
|
||||||
private ValidationExclusion exclusionList = null;
|
private ValidationExclusion exclusionList = null;
|
||||||
private Collection<SamRecordFilter> supplementalFilters = null;
|
private Collection<SamRecordFilter> supplementalFilters = null;
|
||||||
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
protected int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||||
private boolean includeReadsWithDeletionAtLoci = false;
|
private boolean includeReadsWithDeletionAtLoci = false;
|
||||||
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
|
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
|
||||||
// immediately after the reference base such event is associated with?
|
// immediately after the reference base such event is associated with?
|
||||||
|
|
|
||||||
|
|
@ -37,14 +37,16 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
||||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
|
||||||
*/
|
|
||||||
public class LocusIteratorByState extends LocusIterator {
|
public class LocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* our log, which we want to capture anything from this class
|
* the overflow tracker, which makes sure we get a limited number of warnings for locus pile-ups that
|
||||||
|
* exceed the max depth
|
||||||
*/
|
*/
|
||||||
|
private LocusOverflowTracker overflowTracker;
|
||||||
|
|
||||||
|
/** our log, which we want to capture anything from this class */
|
||||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -257,6 +259,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
||||||
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
||||||
this.readInfo = readInformation;
|
this.readInfo = readInformation;
|
||||||
|
overflowTracker = new LocusOverflowTracker(readInformation.getMaxReadsAtLocus());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<AlignmentContext> iterator() {
|
public Iterator<AlignmentContext> iterator() {
|
||||||
|
|
@ -270,6 +273,9 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
boolean r = ! readStates.isEmpty() || it.hasNext();
|
boolean r = ! readStates.isEmpty() || it.hasNext();
|
||||||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||||
|
|
||||||
|
// if we don't have a next record, make sure we clean the warning queue
|
||||||
|
if (!r) overflowTracker.cleanWarningQueue();
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -437,39 +443,30 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
private void collectPendingReads(final int maximumPileupSize) {
|
private void collectPendingReads(int maxReads) {
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
|
|
||||||
Boolean warned = false; // warn them once per locus
|
|
||||||
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
SAMRecord read = it.next();
|
SAMRecord read = it.next();
|
||||||
|
if (readIsPastCurrentPosition(read)) {
|
||||||
// if (DEBUG) logger.debug(String.format("Considering read %s: %s vs. %s",
|
// We've collected up all the reads that span this region
|
||||||
// read.getReadName(), getLocation(), GenomeLocParser.createGenomeLoc(read)));
|
|
||||||
|
|
||||||
if ( readIsPastCurrentPosition(read) ) {
|
|
||||||
// We've collected up enough reads
|
|
||||||
it.pushback(read);
|
it.pushback(read);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
if ( curSize >= maximumPileupSize ) {
|
if (curSize < maxReads) {
|
||||||
if (!warned) {
|
|
||||||
warned = true;
|
|
||||||
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + getLocation());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||||
state.stepForwardOnGenome();
|
state.stepForwardOnGenome();
|
||||||
readStates.add(state);
|
readStates.add(state);
|
||||||
curSize++;
|
curSize++;
|
||||||
if ( state.hadIndel() ) hasExtendedEvents = true;
|
if (state.hadIndel()) hasExtendedEvents = true;
|
||||||
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
overflowTracker.exceeded(read, curSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -513,4 +510,99 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a method for setting the overflow tracker, for dependency injection
|
||||||
|
* @param tracker
|
||||||
|
*/
|
||||||
|
protected void setLocusOverflowTracker(LocusOverflowTracker tracker) {
|
||||||
|
this.overflowTracker = tracker;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a method for getting the overflow tracker, for testing
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
|
return this.overflowTracker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a helper class that organizes the output of warning messages from read pile-ups that
|
||||||
|
* are greater than the max pile-up size. We only want a single warning from each non-contigous
|
||||||
|
* interval, up until the maximum warning limit.
|
||||||
|
*/
|
||||||
|
class LocusOverflowTracker {
|
||||||
|
// the last location we found a warning at
|
||||||
|
protected GenomeLoc lastLocation = null;
|
||||||
|
|
||||||
|
// the maximum warning count, and the number of warnings emitted
|
||||||
|
protected static int warningsEmitted = 0;
|
||||||
|
public static final int MAX_WARNINGS = 100;
|
||||||
|
|
||||||
|
// our maximum pileup size
|
||||||
|
protected final int maxPileupSize;
|
||||||
|
|
||||||
|
// do we have a pending warning?
|
||||||
|
protected boolean warningInQueue = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a LocusOverflowTracker
|
||||||
|
*
|
||||||
|
* @param maxPileup the maximum allowed pile-up size
|
||||||
|
*/
|
||||||
|
public LocusOverflowTracker(int maxPileup) {
|
||||||
|
warningInQueue = false;
|
||||||
|
maxPileupSize = maxPileup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* have we exceeded the maximum pile-up size?
|
||||||
|
*
|
||||||
|
* @param read the current read, we use this to get the GenomeLoc only if needed (for efficiency)
|
||||||
|
* @param pileupSize the pile-up size
|
||||||
|
*
|
||||||
|
* @return return true if we're greater, false if we're not
|
||||||
|
*/
|
||||||
|
public boolean exceeded(SAMRecord read, int pileupSize) {
|
||||||
|
boolean exceeded = pileupSize >= maxPileupSize;
|
||||||
|
if (exceeded) {
|
||||||
|
warningInQueue = true;
|
||||||
|
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
||||||
|
if (lastLocation == null) lastLocation = loc;
|
||||||
|
else if (lastLocation.contiguousP(loc))
|
||||||
|
lastLocation = lastLocation.merge(loc);
|
||||||
|
else {
|
||||||
|
warnUser();
|
||||||
|
lastLocation = loc;
|
||||||
|
}
|
||||||
|
} else if (warningInQueue) {
|
||||||
|
lastLocation = null;
|
||||||
|
warnUser();
|
||||||
|
}
|
||||||
|
return exceeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clean up the warning queue, making sure we haven't stored a warning
|
||||||
|
* that hasn't been emitted yet.
|
||||||
|
*/
|
||||||
|
public void cleanWarningQueue() {
|
||||||
|
if (warningInQueue) warnUser();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
|
||||||
|
protected void warnUser() {
|
||||||
|
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
|
||||||
|
warningInQueue = false;
|
||||||
|
if (warningsEmitted < MAX_WARNINGS) {
|
||||||
|
warningsEmitted++;
|
||||||
|
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maxPileupSize + " at location " + lastLocation);
|
||||||
|
} else if (warningsEmitted == MAX_WARNINGS) {
|
||||||
|
warningsEmitted++;
|
||||||
|
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maxPileupSize + " at location " + lastLocation +
|
||||||
|
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* testing of the LocusIteratorByState
|
||||||
|
*/
|
||||||
|
public class LocusIteratorByStateTest extends BaseTest {
|
||||||
|
|
||||||
|
private final int MAX_READS = 10;
|
||||||
|
private static SAMFileHeader header;
|
||||||
|
private LocusIteratorByState li;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() {
|
||||||
|
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicWarnings() {
|
||||||
|
// create a bunch of fake records
|
||||||
|
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||||
|
for (int x = 1; x < 50; x++)
|
||||||
|
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
|
||||||
|
|
||||||
|
// create a test version of the Reads object
|
||||||
|
TestReads reads = new TestReads(new ArrayList<File>());
|
||||||
|
reads.setMaxPileupSize(MAX_READS);
|
||||||
|
|
||||||
|
// create the iterator by state with the fake reads and fake records
|
||||||
|
li = new LocusIteratorByState(records.iterator(), reads);
|
||||||
|
|
||||||
|
// inject the testing version of the locus iterator watcher
|
||||||
|
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||||
|
((LocusIteratorOverride)li.getLocusOverflowTracker()).resetWarningCount();
|
||||||
|
while (li.hasNext()) {
|
||||||
|
AlignmentContext context = li.next();
|
||||||
|
//System.err.println(context.getLocation() + " " + context.getPileup().size());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(1, ((LocusIteratorOverride) li.getLocusOverflowTracker()).getWarningCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTwoBigPiles() {
|
||||||
|
// create a bunch of fake records
|
||||||
|
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||||
|
for (int x = 1; x < 50; x++)
|
||||||
|
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20));
|
||||||
|
for (int x = 1; x < 50; x++)
|
||||||
|
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
|
||||||
|
|
||||||
|
// create a test version of the Reads object
|
||||||
|
TestReads reads = new TestReads(new ArrayList<File>());
|
||||||
|
reads.setMaxPileupSize(MAX_READS);
|
||||||
|
|
||||||
|
// create the iterator by state with the fake reads and fake records
|
||||||
|
li = new LocusIteratorByState(records.iterator(), reads);
|
||||||
|
|
||||||
|
// inject the testing version of the locus iterator watcher
|
||||||
|
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||||
|
((LocusIteratorOverride)li.getLocusOverflowTracker()).resetWarningCount();
|
||||||
|
while (li.hasNext()) {
|
||||||
|
AlignmentContext context = li.next();
|
||||||
|
//System.err.println(context.getLocation() + " " + context.getPileup().size());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(2, ((LocusIteratorOverride) li.getLocusOverflowTracker()).getWarningCount());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestReads extends Reads {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple constructor for unit testing.
|
||||||
|
*
|
||||||
|
* @param readsFiles List of reads files to open.
|
||||||
|
*/
|
||||||
|
public TestReads(List<File> readsFiles) {
|
||||||
|
super(readsFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxPileupSize(int maxSize) {
|
||||||
|
this.maximumReadsAtLocus = maxSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class LocusOverflowTrackerTest
|
||||||
|
* <p/>
|
||||||
|
* test out the locus overflow tracker
|
||||||
|
*/
|
||||||
|
public class LocusOverflowTrackerTest extends BaseTest {
|
||||||
|
|
||||||
|
private LocusOverflowTracker tracker;
|
||||||
|
private final int MAX_READS = 10;
|
||||||
|
private static SAMFileHeader header;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() {
|
||||||
|
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void beforeTest() {
|
||||||
|
tracker = new LocusIteratorOverride(MAX_READS);
|
||||||
|
((LocusIteratorOverride) tracker).resetWarningCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLocusOverflow() {
|
||||||
|
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 100);
|
||||||
|
if (tracker.exceeded(rec, MAX_READS - 1))
|
||||||
|
Assert.fail("We shouldn't be exceeded when MAX_READS -1 is the input");
|
||||||
|
if (!tracker.exceeded(rec, MAX_READS)) Assert.fail("We should be exceeded when MAX_READS is the input");
|
||||||
|
if (!tracker.exceeded(rec, MAX_READS + 1))
|
||||||
|
Assert.fail("We shouldn't be exceeded when MAX_READS +1 is the input");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContinuousLocus() {
|
||||||
|
for (int x = 1; x < 5; x++) {
|
||||||
|
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 100);
|
||||||
|
tracker.exceeded(rec, MAX_READS + 1);
|
||||||
|
}
|
||||||
|
tracker.cleanWarningQueue();
|
||||||
|
Assert.assertEquals(1, ((LocusIteratorOverride) tracker).getWarningCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTwoSeperateContinuousLoci() {
|
||||||
|
for (int x = 1; x < 5; x++) {
|
||||||
|
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 2);
|
||||||
|
tracker.exceeded(rec, MAX_READS + 1);
|
||||||
|
}
|
||||||
|
for (int x = 10; x < 15; x++) {
|
||||||
|
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 2);
|
||||||
|
tracker.exceeded(rec, MAX_READS + 1);
|
||||||
|
}
|
||||||
|
tracker.cleanWarningQueue();
|
||||||
|
Assert.assertEquals(2, ((LocusIteratorOverride) tracker).getWarningCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// make sure we get only the specified number of warnings
|
||||||
|
public void testOverflow() {
|
||||||
|
for (int x = 1; x < (LocusOverflowTracker.warningsEmitted * 3); x += 2) {
|
||||||
|
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 100);
|
||||||
|
tracker.exceeded(rec, MAX_READS + 1);
|
||||||
|
}
|
||||||
|
tracker.cleanWarningQueue();
|
||||||
|
Assert.assertEquals(LocusOverflowTracker.warningsEmitted, ((LocusIteratorOverride) tracker).getWarningCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LocusIteratorOverride extends LocusOverflowTracker {
|
||||||
|
|
||||||
|
public int warningsDropped = 0;
|
||||||
|
|
||||||
|
public LocusIteratorOverride(int maxPileup) {
|
||||||
|
super(maxPileup);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** override to count warnings instead of generating output. */
|
||||||
|
protected void warnUser() {
|
||||||
|
warningInQueue = false;
|
||||||
|
if (warningsEmitted <= MAX_WARNINGS)
|
||||||
|
warningsEmitted++;
|
||||||
|
else
|
||||||
|
warningsDropped++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getWarningCount() {
|
||||||
|
return warningsEmitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void resetWarningCount() {
|
||||||
|
LocusOverflowTracker.warningsEmitted = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue