added a method to AlignmentContext called hasExceededMaxPileup, which you can use to determine if the current site exceeded the maximum pileup size (reads were dropped). Added this as a check to unified genotyper according to Eric's instructions, and added the plumbing to the engine.

Also deleted the FixBamSortOrder package that isn't used anymore.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2701 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-01-27 05:17:01 +00:00
parent 4bcdab580c
commit 8453676b71
10 changed files with 182 additions and 100 deletions

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
@ -46,6 +47,7 @@ public class AlignmentContext {
protected GenomeLoc loc = null;
protected ReadBackedPileup basePileup = null;
protected ReadBackedExtendedEventPileup extendedPileup = null;
private LocusOverflowTracker tracker;
/**
* The number of bases we've skipped over in the reference since the last map invocation.
@ -202,4 +204,21 @@ public class AlignmentContext {
public long getSkippedBases() {
return skippedBases;
}
/**
* a method for injecting the current locus overflow tracker into the alignment context.
* @param state
*/
public void setLocusOverflowTracker(LocusOverflowTracker state) {
this.tracker = state;
}
/**
* have we exceeded the maximum pileup at the current locus?
* @return true if we have, false otherwise
*/
public boolean hasExceededMaxPileup() {
if (this.tracker == null) return false;
return this.tracker.inDroppedRegion(getLocation());
}
}

View File

@ -5,6 +5,7 @@ import java.util.ArrayList;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
/**
@ -75,6 +76,12 @@ public class AllLocusView extends LocusView {
return createEmptyLocus( currentPosition );
}
@Override
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't hold a overflow tracker
return null;
}
/**
* Creates a blank locus context at the specified location.
* @param site Site at which to create the blank locus context.

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.apache.log4j.Logger;
/**
@ -37,4 +38,10 @@ public class CoveredLocusView extends LocusView {
public AlignmentContext next() {
return nextLocus();
}
@Override
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't store a tracker
return null;
}
}

View File

@ -137,6 +137,9 @@ public abstract class LocusView extends LocusIterator implements View {
else
nextLocus = null;
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
if ((this.loci != null))
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
return current;
}

View File

@ -6,8 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MergingIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
@ -129,7 +129,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
lastLoc = site;
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases);
}
public LocusOverflowTracker getLocusOverflowTracker() {
// we don't have an overflow tracker
return null;
}
private RefMetaDataTracker createTracker( Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere ) {
RefMetaDataTracker t = new RefMetaDataTracker();
for ( RODRecordList<ReferenceOrderedDatum> track : allTracksHere ) {

View File

@ -29,4 +29,12 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
/**
* a method for getting the overflow tracker, which is used to track sites at which the read count exceeds the
* pile-up threshold set on the command line
*
* @return the overflow tracker, null if no tracker exists
*/
public abstract LocusOverflowTracker getLocusOverflowTracker();
}

View File

@ -449,6 +449,10 @@ public class LocusIteratorByState extends LocusIterator {
// printState();
//}
GenomeLoc location = null;
// the farthest right a read extends
Integer rightMostEnd = -1;
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
while (it.hasNext()) {
SAMRecord read = it.next();
@ -464,13 +468,17 @@ public class LocusIteratorByState extends LocusIterator {
curSize++;
if (state.hadIndel()) hasExtendedEvents = true;
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
} else if (location == null) {
location = GenomeLocParser.createGenomeLoc(read);
} else {
if (location == null)
location = GenomeLocParser.createGenomeLoc(read);
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
}
}
}
if (location != null) overflowTracker.exceeded(location, curSize);
if (location != null)
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
curSize);
}
// fast testing of position
@ -523,89 +531,11 @@ public class LocusIteratorByState extends LocusIterator {
}
/**
* a method for getting the overflow tracker, for testing
* @return
* a method for getting the overflow tracker
* @return the overflow tracker, null if none exists
*/
protected LocusOverflowTracker getLocusOverflowTracker() {
public LocusOverflowTracker getLocusOverflowTracker() {
return this.overflowTracker;
}
}
/**
* a helper class that organizes the output of warning messages from read pile-ups that
* are greater than the max pile-up size. We only want a single warning from each non-contigous
* interval, up until the maximum warning limit.
*/
class LocusOverflowTracker {
// the last location we found a warning at
protected GenomeLoc lastLocation = null;
// the maximum warning count, and the number of warnings emitted
protected static int warningsEmitted = 0;
public static final int MAX_WARNINGS = 100;
// our maximum pileup size
protected final int maxPileupSize;
// do we have a pending warning?
protected boolean warningInQueue = false;
/**
* create a LocusOverflowTracker
*
* @param maxPileup the maximum allowed pile-up size
*/
public LocusOverflowTracker(int maxPileup) {
warningInQueue = false;
maxPileupSize = maxPileup;
}
/**
* have we exceeded the maximum pile-up size?
*
* @param loc the current location
* @param pileupSize the pile-up size
*
* @return return true if we're greater, false if we're not
*/
public boolean exceeded(GenomeLoc loc, int pileupSize) {
boolean exceeded = pileupSize >= maxPileupSize;
if (exceeded && warningsEmitted <= MAX_WARNINGS) {
if (lastLocation == null) lastLocation = loc;
else if (lastLocation.contiguousP(loc)) {
lastLocation = lastLocation.merge(loc);
}
else {
warnUser();
lastLocation = loc;
}
warningInQueue = true;
} else if (warningInQueue) {
warnUser();
lastLocation = null;
}
return exceeded;
}
/**
* clean up the warning queue, making sure we haven't stored a warning
* that hasn't been emitted yet.
*/
public void cleanWarningQueue() {
if (warningInQueue) warnUser();
}
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
protected void warnUser() {
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
warningInQueue = false;
if (warningsEmitted < MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
} else if (warningsEmitted == MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
}
}
}

View File

@ -0,0 +1,113 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
/**
* a helper class that organizes the output of warning messages from read pile-ups that
* are greater than the max pile-up size. We only want a single warning from each non-contigous
* interval, up until the maximum warning limit.
*
* cleanWarningQueue() must be called when you're finished with the LocusOverflowTracker to make
* sure that no errors are left in the queue.
*
*/
public class LocusOverflowTracker {
// the last interval we emitted a warning for
protected GenomeLoc lastLocation = null;
// the maximum warning count, and the number of warnings emitted
protected static int warningsEmitted = 0;
public static final int MAX_WARNINGS = 100;
// our maximum pileup size
protected final int maxPileupSize;
// do we have a pending warning?
protected boolean warningInQueue = false;
/**
* create a LocusOverflowTracker
*
* @param maxPileup the maximum allowed pile-up size
*/
public LocusOverflowTracker(int maxPileup) {
warningInQueue = false;
maxPileupSize = maxPileup;
}
/**
* have we exceeded the maximum pile-up size?
*
* @param loc the current location
* @param pileupSize the pile-up size
*
* @return return true if we're greater, false if we're not
*/
public boolean exceeded(GenomeLoc loc, int pileupSize) {
boolean exceeded = pileupSize >= maxPileupSize;
if (exceeded) {
// if the last location is null, we're starting a new region that exceeds max_reads_at_locus
if (lastLocation == null) lastLocation = loc;
// are we contiguous to the last genome loc?
else if (lastLocation.contiguousP(loc)) {
lastLocation = lastLocation.merge(loc);
}
// we have an existing region, and the current is not contiguous. Emit the old and store the new
else {
warnUser();
lastLocation = loc;
}
// regardless, we have a warning in the queue
warningInQueue = true;
}
// we don't have a warning at this, but there is one in the queue
else if (warningInQueue) {
warnUser();
lastLocation = null;
}
// return true if we exceeded the max size at this location
return exceeded;
}
/**
* clean up the warning queue, making sure we haven't stored a warning
* that hasn't been emitted yet.
*/
public void cleanWarningQueue() {
if (warningInQueue) warnUser();
}
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
protected void warnUser() {
// make sure we have a warning in the queue
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
// reset the warning light
warningInQueue = false;
// check to see if we've meet our warning threshold or not. If we're equal to the threshold emit a message saying this
// is the last warning they'll see
if (warningsEmitted < MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
} else if (warningsEmitted == MAX_WARNINGS) {
warningsEmitted++;
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
}
}
/**
* is the specified location in the current exceeded pileup region
* @param position the position
* @return true if we're in that region
*/
public boolean inDroppedRegion(GenomeLoc position) {
if (lastLocation == null || position == null) return false;
return position.overlapsP(lastLocation) ? true : false;
}
}

View File

@ -260,11 +260,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( !BaseUtils.isRegularBase(ref) )
return null;
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
if ( rawPileup.size() == getToolkit().getArguments().readMaxPileup )
if ( rawContext.hasExceededMaxPileup() )
return null;
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="FixBAMSortOrder">
<executable name="FixBAMSortOrder">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- The walker -->
<class name="org.broadinstitute.sting.gatk.walkers.FixBAMSortOrderTag" />
</dependencies>
</executable>
</package>