added a method to AlignmentContext called hasExceededMaxPileup, which you can use to determine if the current site exceeded the maximum pileup size (reads were dropped). Added this as a check to unified genotyper according to Eric's instructions, and added the plumbing to the engine.
Also deleted the FixBamSortOrder package that isn't used anymore. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2701 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4bcdab580c
commit
8453676b71
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -46,6 +47,7 @@ public class AlignmentContext {
|
|||
protected GenomeLoc loc = null;
|
||||
protected ReadBackedPileup basePileup = null;
|
||||
protected ReadBackedExtendedEventPileup extendedPileup = null;
|
||||
private LocusOverflowTracker tracker;
|
||||
|
||||
/**
|
||||
* The number of bases we've skipped over in the reference since the last map invocation.
|
||||
|
|
@ -202,4 +204,21 @@ public class AlignmentContext {
|
|||
public long getSkippedBases() {
|
||||
return skippedBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* a method for injecting the current locus overflow tracker into the alignment context.
|
||||
* @param state
|
||||
*/
|
||||
public void setLocusOverflowTracker(LocusOverflowTracker state) {
|
||||
this.tracker = state;
|
||||
}
|
||||
|
||||
/**
|
||||
* have we exceeded the maximum pileup at the current locus?
|
||||
* @return true if we have, false otherwise
|
||||
*/
|
||||
public boolean hasExceededMaxPileup() {
|
||||
if (this.tracker == null) return false;
|
||||
return this.tracker.inDroppedRegion(getLocation());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
/**
|
||||
|
|
@ -75,6 +76,12 @@ public class AllLocusView extends LocusView {
|
|||
return createEmptyLocus( currentPosition );
|
||||
}
|
||||
|
||||
@Override
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
// we don't hold a overflow tracker
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a blank locus context at the specified location.
|
||||
* @param site Site at which to create the blank locus context.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.apache.log4j.Logger;
|
||||
/**
|
||||
|
|
@ -37,4 +38,10 @@ public class CoveredLocusView extends LocusView {
|
|||
public AlignmentContext next() {
|
||||
return nextLocus();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
// we don't store a tracker
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,6 +137,9 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
else
|
||||
nextLocus = null;
|
||||
|
||||
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||
if ((this.loci != null))
|
||||
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
|
||||
return current;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MergingIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -129,7 +129,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
lastLoc = site;
|
||||
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases);
|
||||
}
|
||||
|
||||
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
// we don't have an overflow tracker
|
||||
return null;
|
||||
}
|
||||
|
||||
private RefMetaDataTracker createTracker( Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere ) {
|
||||
RefMetaDataTracker t = new RefMetaDataTracker();
|
||||
for ( RODRecordList<ReferenceOrderedDatum> track : allTracksHere ) {
|
||||
|
|
|
|||
|
|
@ -29,4 +29,12 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
|
|||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
/**
|
||||
* a method for getting the overflow tracker, which is used to track sites at which the read count exceeds the
|
||||
* pile-up threshold set on the command line
|
||||
*
|
||||
* @return the overflow tracker, null if no tracker exists
|
||||
*/
|
||||
public abstract LocusOverflowTracker getLocusOverflowTracker();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -449,6 +449,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// printState();
|
||||
//}
|
||||
GenomeLoc location = null;
|
||||
|
||||
// the farthest right a read extends
|
||||
Integer rightMostEnd = -1;
|
||||
|
||||
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
||||
while (it.hasNext()) {
|
||||
SAMRecord read = it.next();
|
||||
|
|
@ -464,13 +468,17 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
curSize++;
|
||||
if (state.hadIndel()) hasExtendedEvents = true;
|
||||
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
||||
} else if (location == null) {
|
||||
location = GenomeLocParser.createGenomeLoc(read);
|
||||
} else {
|
||||
if (location == null)
|
||||
location = GenomeLocParser.createGenomeLoc(read);
|
||||
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
if (location != null) overflowTracker.exceeded(location, curSize);
|
||||
if (location != null)
|
||||
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
||||
curSize);
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
|
|
@ -523,89 +531,11 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
/**
|
||||
* a method for getting the overflow tracker, for testing
|
||||
* @return
|
||||
* a method for getting the overflow tracker
|
||||
* @return the overflow tracker, null if none exists
|
||||
*/
|
||||
protected LocusOverflowTracker getLocusOverflowTracker() {
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
return this.overflowTracker;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a helper class that organizes the output of warning messages from read pile-ups that
|
||||
* are greater than the max pile-up size. We only want a single warning from each non-contigous
|
||||
* interval, up until the maximum warning limit.
|
||||
*/
|
||||
class LocusOverflowTracker {
|
||||
// the last location we found a warning at
|
||||
protected GenomeLoc lastLocation = null;
|
||||
|
||||
// the maximum warning count, and the number of warnings emitted
|
||||
protected static int warningsEmitted = 0;
|
||||
public static final int MAX_WARNINGS = 100;
|
||||
|
||||
// our maximum pileup size
|
||||
protected final int maxPileupSize;
|
||||
|
||||
// do we have a pending warning?
|
||||
protected boolean warningInQueue = false;
|
||||
|
||||
/**
|
||||
* create a LocusOverflowTracker
|
||||
*
|
||||
* @param maxPileup the maximum allowed pile-up size
|
||||
*/
|
||||
public LocusOverflowTracker(int maxPileup) {
|
||||
warningInQueue = false;
|
||||
maxPileupSize = maxPileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* have we exceeded the maximum pile-up size?
|
||||
*
|
||||
* @param loc the current location
|
||||
* @param pileupSize the pile-up size
|
||||
*
|
||||
* @return return true if we're greater, false if we're not
|
||||
*/
|
||||
public boolean exceeded(GenomeLoc loc, int pileupSize) {
|
||||
boolean exceeded = pileupSize >= maxPileupSize;
|
||||
if (exceeded && warningsEmitted <= MAX_WARNINGS) {
|
||||
if (lastLocation == null) lastLocation = loc;
|
||||
else if (lastLocation.contiguousP(loc)) {
|
||||
lastLocation = lastLocation.merge(loc);
|
||||
}
|
||||
else {
|
||||
warnUser();
|
||||
lastLocation = loc;
|
||||
}
|
||||
warningInQueue = true;
|
||||
} else if (warningInQueue) {
|
||||
warnUser();
|
||||
lastLocation = null;
|
||||
}
|
||||
return exceeded;
|
||||
}
|
||||
|
||||
/**
|
||||
* clean up the warning queue, making sure we haven't stored a warning
|
||||
* that hasn't been emitted yet.
|
||||
*/
|
||||
public void cleanWarningQueue() {
|
||||
if (warningInQueue) warnUser();
|
||||
}
|
||||
|
||||
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
|
||||
protected void warnUser() {
|
||||
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
|
||||
warningInQueue = false;
|
||||
if (warningsEmitted < MAX_WARNINGS) {
|
||||
warningsEmitted++;
|
||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
|
||||
} else if (warningsEmitted == MAX_WARNINGS) {
|
||||
warningsEmitted++;
|
||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
|
||||
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* a helper class that organizes the output of warning messages from read pile-ups that
|
||||
* are greater than the max pile-up size. We only want a single warning from each non-contigous
|
||||
* interval, up until the maximum warning limit.
|
||||
*
|
||||
* cleanWarningQueue() must be called when you're finished with the LocusOverflowTracker to make
|
||||
* sure that no errors are left in the queue.
|
||||
*
|
||||
*/
|
||||
public class LocusOverflowTracker {
|
||||
// the last interval we emitted a warning for
|
||||
protected GenomeLoc lastLocation = null;
|
||||
|
||||
// the maximum warning count, and the number of warnings emitted
|
||||
protected static int warningsEmitted = 0;
|
||||
public static final int MAX_WARNINGS = 100;
|
||||
|
||||
// our maximum pileup size
|
||||
protected final int maxPileupSize;
|
||||
|
||||
// do we have a pending warning?
|
||||
protected boolean warningInQueue = false;
|
||||
|
||||
/**
|
||||
* create a LocusOverflowTracker
|
||||
*
|
||||
* @param maxPileup the maximum allowed pile-up size
|
||||
*/
|
||||
public LocusOverflowTracker(int maxPileup) {
|
||||
warningInQueue = false;
|
||||
maxPileupSize = maxPileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* have we exceeded the maximum pile-up size?
|
||||
*
|
||||
* @param loc the current location
|
||||
* @param pileupSize the pile-up size
|
||||
*
|
||||
* @return return true if we're greater, false if we're not
|
||||
*/
|
||||
public boolean exceeded(GenomeLoc loc, int pileupSize) {
|
||||
boolean exceeded = pileupSize >= maxPileupSize;
|
||||
if (exceeded) {
|
||||
|
||||
// if the last location is null, we're starting a new region that exceeds max_reads_at_locus
|
||||
if (lastLocation == null) lastLocation = loc;
|
||||
// are we contiguous to the last genome loc?
|
||||
else if (lastLocation.contiguousP(loc)) {
|
||||
lastLocation = lastLocation.merge(loc);
|
||||
}
|
||||
// we have an existing region, and the current is not contiguous. Emit the old and store the new
|
||||
else {
|
||||
warnUser();
|
||||
lastLocation = loc;
|
||||
}
|
||||
|
||||
// regardless, we have a warning in the queue
|
||||
warningInQueue = true;
|
||||
}
|
||||
// we don't have a warning at this, but there is one in the queue
|
||||
else if (warningInQueue) {
|
||||
warnUser();
|
||||
lastLocation = null;
|
||||
}
|
||||
// return true if we exceeded the max size at this location
|
||||
return exceeded;
|
||||
}
|
||||
|
||||
/**
|
||||
* clean up the warning queue, making sure we haven't stored a warning
|
||||
* that hasn't been emitted yet.
|
||||
*/
|
||||
public void cleanWarningQueue() {
|
||||
if (warningInQueue) warnUser();
|
||||
}
|
||||
|
||||
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
|
||||
protected void warnUser() {
|
||||
|
||||
// make sure we have a warning in the queue
|
||||
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
|
||||
|
||||
// reset the warning light
|
||||
warningInQueue = false;
|
||||
|
||||
// check to see if we've meet our warning threshold or not. If we're equal to the threshold emit a message saying this
|
||||
// is the last warning they'll see
|
||||
if (warningsEmitted < MAX_WARNINGS) {
|
||||
warningsEmitted++;
|
||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
|
||||
} else if (warningsEmitted == MAX_WARNINGS) {
|
||||
warningsEmitted++;
|
||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
|
||||
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* is the specified location in the current exceeded pileup region
|
||||
* @param position the position
|
||||
* @return true if we're in that region
|
||||
*/
|
||||
public boolean inDroppedRegion(GenomeLoc position) {
|
||||
if (lastLocation == null || position == null) return false;
|
||||
return position.overlapsP(lastLocation) ? true : false;
|
||||
}
|
||||
}
|
||||
|
|
@ -260,11 +260,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
return null;
|
||||
|
||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
|
||||
if ( rawPileup.size() == getToolkit().getArguments().readMaxPileup )
|
||||
if ( rawContext.hasExceededMaxPileup() )
|
||||
return null;
|
||||
|
||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||
|
||||
// filter the context based on min base and mapping qualities
|
||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package name="FixBAMSortOrder">
|
||||
<executable name="FixBAMSortOrder">
|
||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
||||
<resource-bundle file="StingText.properties" />
|
||||
<dependencies>
|
||||
<!-- The walker -->
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.FixBAMSortOrderTag" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
</package>
|
||||
Loading…
Reference in New Issue