diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index baf643a85..9434944ca 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import java.util.*; @@ -46,6 +47,7 @@ public class AlignmentContext { protected GenomeLoc loc = null; protected ReadBackedPileup basePileup = null; protected ReadBackedExtendedEventPileup extendedPileup = null; + private LocusOverflowTracker tracker; /** * The number of bases we've skipped over in the reference since the last map invocation. @@ -202,4 +204,21 @@ public class AlignmentContext { public long getSkippedBases() { return skippedBases; } + + /** + * a method for injecting the current locus overflow tracker into the alignment context. + * @param state + */ + public void setLocusOverflowTracker(LocusOverflowTracker state) { + this.tracker = state; + } + + /** + * have we exceeded the maximum pileup at the current locus? + * @return true if we have, false otherwise + */ + public boolean hasExceededMaxPileup() { + if (this.tracker == null) return false; + return this.tracker.inDroppedRegion(getLocation()); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index 91189c61f..ab5b65119 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import org.broadinstitute.sting.utils.GenomeLoc; import net.sf.samtools.SAMRecord; /** @@ -75,6 +76,12 @@ public class AllLocusView extends LocusView { return createEmptyLocus( currentPosition ); } + @Override + public LocusOverflowTracker getLocusOverflowTracker() { + // we don't hold a overflow tracker + return null; + } + /** * Creates a blank locus context at the specified location. * @param site Site at which to create the blank locus context. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java index d89708bbc..8491e258e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.apache.log4j.Logger; /** @@ -37,4 +38,10 @@ public class CoveredLocusView extends LocusView { public AlignmentContext next() { return nextLocus(); } + + @Override + public LocusOverflowTracker getLocusOverflowTracker() { + // we don't store a tracker + return null; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 576fb32eb..eac4787a2 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -137,6 +137,9 @@ public abstract class LocusView extends LocusIterator implements View { else nextLocus = null; + // if the current loci isn't null, get the overflow tracker and pass it to the alignment context + if ((this.loci != null)) + current.setLocusOverflowTracker(loci.getLocusOverflowTracker()); return current; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 0c18dd1f0..77e4b57d5 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -6,8 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MergingIterator; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import java.util.*; @@ -129,7 +129,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { lastLoc = site; return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases); } - + + public LocusOverflowTracker getLocusOverflowTracker() { + // we don't have an overflow tracker + return null; + } + private RefMetaDataTracker createTracker( Collection> allTracksHere ) { RefMetaDataTracker t = new RefMetaDataTracker(); for ( RODRecordList track : allTracksHere ) { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java index 30c1cf512..27a8be248 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java @@ -29,4 +29,12 @@ public abstract class LocusIterator implements Iterable, Close public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } + + /** + * a method for getting the overflow tracker, which is used to track sites at which the read count exceeds the + * pile-up threshold set on the command line + * + * @return the overflow tracker, null if no tracker exists + */ + public abstract LocusOverflowTracker getLocusOverflowTracker(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index c52c79ce2..f43616c71 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -449,6 +449,10 @@ public class LocusIteratorByState extends LocusIterator { // printState(); //} GenomeLoc location = null; + + // the farthest right a read extends + Integer rightMostEnd = -1; + int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation while (it.hasNext()) { SAMRecord read = it.next(); @@ -464,13 +468,17 @@ public class LocusIteratorByState extends LocusIterator { curSize++; if (state.hadIndel()) hasExtendedEvents = true; //if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName())); - } else if (location == null) { - location = GenomeLocParser.createGenomeLoc(read); + } else { + if (location == null) + location = GenomeLocParser.createGenomeLoc(read); + rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd; } - } + } - if (location != null) overflowTracker.exceeded(location, curSize); + if (location != null) + overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd), + curSize); } // fast testing of position @@ -523,89 +531,11 @@ public class LocusIteratorByState extends LocusIterator { } /** - * a method for getting the overflow tracker, for testing - * @return + * a method for getting the overflow tracker + * @return the overflow tracker, null if none exists */ - protected LocusOverflowTracker getLocusOverflowTracker() { + public LocusOverflowTracker getLocusOverflowTracker() { return this.overflowTracker; } } -/** - * a helper class that organizes the output of warning messages from read pile-ups that - * are greater than the max pile-up size. We only want a single warning from each non-contigous - * interval, up until the maximum warning limit. - */ -class LocusOverflowTracker { - // the last location we found a warning at - protected GenomeLoc lastLocation = null; - - // the maximum warning count, and the number of warnings emitted - protected static int warningsEmitted = 0; - public static final int MAX_WARNINGS = 100; - - // our maximum pileup size - protected final int maxPileupSize; - - // do we have a pending warning? - protected boolean warningInQueue = false; - - /** - * create a LocusOverflowTracker - * - * @param maxPileup the maximum allowed pile-up size - */ - public LocusOverflowTracker(int maxPileup) { - warningInQueue = false; - maxPileupSize = maxPileup; - } - - /** - * have we exceeded the maximum pile-up size? - * - * @param loc the current location - * @param pileupSize the pile-up size - * - * @return return true if we're greater, false if we're not - */ - public boolean exceeded(GenomeLoc loc, int pileupSize) { - boolean exceeded = pileupSize >= maxPileupSize; - if (exceeded && warningsEmitted <= MAX_WARNINGS) { - if (lastLocation == null) lastLocation = loc; - else if (lastLocation.contiguousP(loc)) { - lastLocation = lastLocation.merge(loc); - } - else { - warnUser(); - lastLocation = loc; - } - warningInQueue = true; - } else if (warningInQueue) { - warnUser(); - lastLocation = null; - } - return exceeded; - } - - /** - * clean up the warning queue, making sure we haven't stored a warning - * that hasn't been emitted yet. - */ - public void cleanWarningQueue() { - if (warningInQueue) warnUser(); - } - - /** warn the user, checking to make sure we haven't exceeded the maximum warning level. */ - protected void warnUser() { - if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()"); - warningInQueue = false; - if (warningsEmitted < MAX_WARNINGS) { - warningsEmitted++; - Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation); - } else if (warningsEmitted == MAX_WARNINGS) { - warningsEmitted++; - Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation + - "; the maximum warning count has been reached, we will no longer emit warnings of this nature!!"); - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusOverflowTracker.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusOverflowTracker.java new file mode 100644 index 000000000..d41e66751 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusOverflowTracker.java @@ -0,0 +1,113 @@ +package org.broadinstitute.sting.gatk.iterators; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; + +/** + * a helper class that organizes the output of warning messages from read pile-ups that + * are greater than the max pile-up size. We only want a single warning from each non-contigous + * interval, up until the maximum warning limit. + * + * cleanWarningQueue() must be called when you're finished with the LocusOverflowTracker to make + * sure that no errors are left in the queue. + * + */ +public class LocusOverflowTracker { + // the last interval we emitted a warning for + protected GenomeLoc lastLocation = null; + + // the maximum warning count, and the number of warnings emitted + protected static int warningsEmitted = 0; + public static final int MAX_WARNINGS = 100; + + // our maximum pileup size + protected final int maxPileupSize; + + // do we have a pending warning? + protected boolean warningInQueue = false; + + /** + * create a LocusOverflowTracker + * + * @param maxPileup the maximum allowed pile-up size + */ + public LocusOverflowTracker(int maxPileup) { + warningInQueue = false; + maxPileupSize = maxPileup; + } + + /** + * have we exceeded the maximum pile-up size? + * + * @param loc the current location + * @param pileupSize the pile-up size + * + * @return return true if we're greater, false if we're not + */ + public boolean exceeded(GenomeLoc loc, int pileupSize) { + boolean exceeded = pileupSize >= maxPileupSize; + if (exceeded) { + + // if the last location is null, we're starting a new region that exceeds max_reads_at_locus + if (lastLocation == null) lastLocation = loc; + // are we contiguous to the last genome loc? + else if (lastLocation.contiguousP(loc)) { + lastLocation = lastLocation.merge(loc); + } + // we have an existing region, and the current is not contiguous. Emit the old and store the new + else { + warnUser(); + lastLocation = loc; + } + + // regardless, we have a warning in the queue + warningInQueue = true; + } + // we don't have a warning at this, but there is one in the queue + else if (warningInQueue) { + warnUser(); + lastLocation = null; + } + // return true if we exceeded the max size at this location + return exceeded; + } + + /** + * clean up the warning queue, making sure we haven't stored a warning + * that hasn't been emitted yet. + */ + public void cleanWarningQueue() { + if (warningInQueue) warnUser(); + } + + /** warn the user, checking to make sure we haven't exceeded the maximum warning level. */ + protected void warnUser() { + + // make sure we have a warning in the queue + if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()"); + + // reset the warning light + warningInQueue = false; + + // check to see if we've meet our warning threshold or not. If we're equal to the threshold emit a message saying this + // is the last warning they'll see + if (warningsEmitted < MAX_WARNINGS) { + warningsEmitted++; + Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation); + } else if (warningsEmitted == MAX_WARNINGS) { + warningsEmitted++; + Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation + + "; the maximum warning count has been reached, we will no longer emit warnings of this nature!!"); + } + } + + /** + * is the specified location in the current exceeded pileup region + * @param position the position + * @return true if we're in that region + */ + public boolean inDroppedRegion(GenomeLoc position) { + if (lastLocation == null || position == null) return false; + return position.overlapsP(lastLocation) ? true : false; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c819e3f6a..38f4bc2af 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -260,11 +260,12 @@ public class UnifiedGenotyper extends LocusWalker - - - - - - - - - -