added a method to AlignmentContext called hasExceededMaxPileup, which you can use to determine if the current site exceeded the maximum pileup size (reads were dropped). Added this as a check to unified genotyper according to Eric's instructions, and added the plumbing to the engine.
Also deleted the FixBamSortOrder package that isn't used anymore. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2701 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4bcdab580c
commit
8453676b71
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -46,6 +47,7 @@ public class AlignmentContext {
|
||||||
protected GenomeLoc loc = null;
|
protected GenomeLoc loc = null;
|
||||||
protected ReadBackedPileup basePileup = null;
|
protected ReadBackedPileup basePileup = null;
|
||||||
protected ReadBackedExtendedEventPileup extendedPileup = null;
|
protected ReadBackedExtendedEventPileup extendedPileup = null;
|
||||||
|
private LocusOverflowTracker tracker;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of bases we've skipped over in the reference since the last map invocation.
|
* The number of bases we've skipped over in the reference since the last map invocation.
|
||||||
|
|
@ -202,4 +204,21 @@ public class AlignmentContext {
|
||||||
public long getSkippedBases() {
|
public long getSkippedBases() {
|
||||||
return skippedBases;
|
return skippedBases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a method for injecting the current locus overflow tracker into the alignment context.
|
||||||
|
* @param state
|
||||||
|
*/
|
||||||
|
public void setLocusOverflowTracker(LocusOverflowTracker state) {
|
||||||
|
this.tracker = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* have we exceeded the maximum pileup at the current locus?
|
||||||
|
* @return true if we have, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean hasExceededMaxPileup() {
|
||||||
|
if (this.tracker == null) return false;
|
||||||
|
return this.tracker.inDroppedRegion(getLocation());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import java.util.ArrayList;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
/**
|
/**
|
||||||
|
|
@ -75,6 +76,12 @@ public class AllLocusView extends LocusView {
|
||||||
return createEmptyLocus( currentPosition );
|
return createEmptyLocus( currentPosition );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
|
// we don't hold a overflow tracker
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a blank locus context at the specified location.
|
* Creates a blank locus context at the specified location.
|
||||||
* @param site Site at which to create the blank locus context.
|
* @param site Site at which to create the blank locus context.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
/**
|
/**
|
||||||
|
|
@ -37,4 +38,10 @@ public class CoveredLocusView extends LocusView {
|
||||||
public AlignmentContext next() {
|
public AlignmentContext next() {
|
||||||
return nextLocus();
|
return nextLocus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
|
// we don't store a tracker
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,9 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
else
|
else
|
||||||
nextLocus = null;
|
nextLocus = null;
|
||||||
|
|
||||||
|
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||||
|
if ((this.loci != null))
|
||||||
|
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MergingIterator;
|
import org.broadinstitute.sting.utils.MergingIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -129,7 +129,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
lastLoc = site;
|
lastLoc = site;
|
||||||
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases);
|
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
|
// we don't have an overflow tracker
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private RefMetaDataTracker createTracker( Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere ) {
|
private RefMetaDataTracker createTracker( Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere ) {
|
||||||
RefMetaDataTracker t = new RefMetaDataTracker();
|
RefMetaDataTracker t = new RefMetaDataTracker();
|
||||||
for ( RODRecordList<ReferenceOrderedDatum> track : allTracksHere ) {
|
for ( RODRecordList<ReferenceOrderedDatum> track : allTracksHere ) {
|
||||||
|
|
|
||||||
|
|
@ -29,4 +29,12 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a method for getting the overflow tracker, which is used to track sites at which the read count exceeds the
|
||||||
|
* pile-up threshold set on the command line
|
||||||
|
*
|
||||||
|
* @return the overflow tracker, null if no tracker exists
|
||||||
|
*/
|
||||||
|
public abstract LocusOverflowTracker getLocusOverflowTracker();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -449,6 +449,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
GenomeLoc location = null;
|
GenomeLoc location = null;
|
||||||
|
|
||||||
|
// the farthest right a read extends
|
||||||
|
Integer rightMostEnd = -1;
|
||||||
|
|
||||||
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
SAMRecord read = it.next();
|
SAMRecord read = it.next();
|
||||||
|
|
@ -464,13 +468,17 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
curSize++;
|
curSize++;
|
||||||
if (state.hadIndel()) hasExtendedEvents = true;
|
if (state.hadIndel()) hasExtendedEvents = true;
|
||||||
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
||||||
} else if (location == null) {
|
} else {
|
||||||
location = GenomeLocParser.createGenomeLoc(read);
|
if (location == null)
|
||||||
|
location = GenomeLocParser.createGenomeLoc(read);
|
||||||
|
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (location != null) overflowTracker.exceeded(location, curSize);
|
if (location != null)
|
||||||
|
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
||||||
|
curSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
// fast testing of position
|
// fast testing of position
|
||||||
|
|
@ -523,89 +531,11 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* a method for getting the overflow tracker, for testing
|
* a method for getting the overflow tracker
|
||||||
* @return
|
* @return the overflow tracker, null if none exists
|
||||||
*/
|
*/
|
||||||
protected LocusOverflowTracker getLocusOverflowTracker() {
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
return this.overflowTracker;
|
return this.overflowTracker;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* a helper class that organizes the output of warning messages from read pile-ups that
|
|
||||||
* are greater than the max pile-up size. We only want a single warning from each non-contigous
|
|
||||||
* interval, up until the maximum warning limit.
|
|
||||||
*/
|
|
||||||
class LocusOverflowTracker {
|
|
||||||
// the last location we found a warning at
|
|
||||||
protected GenomeLoc lastLocation = null;
|
|
||||||
|
|
||||||
// the maximum warning count, and the number of warnings emitted
|
|
||||||
protected static int warningsEmitted = 0;
|
|
||||||
public static final int MAX_WARNINGS = 100;
|
|
||||||
|
|
||||||
// our maximum pileup size
|
|
||||||
protected final int maxPileupSize;
|
|
||||||
|
|
||||||
// do we have a pending warning?
|
|
||||||
protected boolean warningInQueue = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a LocusOverflowTracker
|
|
||||||
*
|
|
||||||
* @param maxPileup the maximum allowed pile-up size
|
|
||||||
*/
|
|
||||||
public LocusOverflowTracker(int maxPileup) {
|
|
||||||
warningInQueue = false;
|
|
||||||
maxPileupSize = maxPileup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* have we exceeded the maximum pile-up size?
|
|
||||||
*
|
|
||||||
* @param loc the current location
|
|
||||||
* @param pileupSize the pile-up size
|
|
||||||
*
|
|
||||||
* @return return true if we're greater, false if we're not
|
|
||||||
*/
|
|
||||||
public boolean exceeded(GenomeLoc loc, int pileupSize) {
|
|
||||||
boolean exceeded = pileupSize >= maxPileupSize;
|
|
||||||
if (exceeded && warningsEmitted <= MAX_WARNINGS) {
|
|
||||||
if (lastLocation == null) lastLocation = loc;
|
|
||||||
else if (lastLocation.contiguousP(loc)) {
|
|
||||||
lastLocation = lastLocation.merge(loc);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
warnUser();
|
|
||||||
lastLocation = loc;
|
|
||||||
}
|
|
||||||
warningInQueue = true;
|
|
||||||
} else if (warningInQueue) {
|
|
||||||
warnUser();
|
|
||||||
lastLocation = null;
|
|
||||||
}
|
|
||||||
return exceeded;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* clean up the warning queue, making sure we haven't stored a warning
|
|
||||||
* that hasn't been emitted yet.
|
|
||||||
*/
|
|
||||||
public void cleanWarningQueue() {
|
|
||||||
if (warningInQueue) warnUser();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
|
|
||||||
protected void warnUser() {
|
|
||||||
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
|
|
||||||
warningInQueue = false;
|
|
||||||
if (warningsEmitted < MAX_WARNINGS) {
|
|
||||||
warningsEmitted++;
|
|
||||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
|
|
||||||
} else if (warningsEmitted == MAX_WARNINGS) {
|
|
||||||
warningsEmitted++;
|
|
||||||
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
|
|
||||||
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a helper class that organizes the output of warning messages from read pile-ups that
|
||||||
|
* are greater than the max pile-up size. We only want a single warning from each non-contigous
|
||||||
|
* interval, up until the maximum warning limit.
|
||||||
|
*
|
||||||
|
* cleanWarningQueue() must be called when you're finished with the LocusOverflowTracker to make
|
||||||
|
* sure that no errors are left in the queue.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class LocusOverflowTracker {
|
||||||
|
// the last interval we emitted a warning for
|
||||||
|
protected GenomeLoc lastLocation = null;
|
||||||
|
|
||||||
|
// the maximum warning count, and the number of warnings emitted
|
||||||
|
protected static int warningsEmitted = 0;
|
||||||
|
public static final int MAX_WARNINGS = 100;
|
||||||
|
|
||||||
|
// our maximum pileup size
|
||||||
|
protected final int maxPileupSize;
|
||||||
|
|
||||||
|
// do we have a pending warning?
|
||||||
|
protected boolean warningInQueue = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a LocusOverflowTracker
|
||||||
|
*
|
||||||
|
* @param maxPileup the maximum allowed pile-up size
|
||||||
|
*/
|
||||||
|
public LocusOverflowTracker(int maxPileup) {
|
||||||
|
warningInQueue = false;
|
||||||
|
maxPileupSize = maxPileup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* have we exceeded the maximum pile-up size?
|
||||||
|
*
|
||||||
|
* @param loc the current location
|
||||||
|
* @param pileupSize the pile-up size
|
||||||
|
*
|
||||||
|
* @return return true if we're greater, false if we're not
|
||||||
|
*/
|
||||||
|
public boolean exceeded(GenomeLoc loc, int pileupSize) {
|
||||||
|
boolean exceeded = pileupSize >= maxPileupSize;
|
||||||
|
if (exceeded) {
|
||||||
|
|
||||||
|
// if the last location is null, we're starting a new region that exceeds max_reads_at_locus
|
||||||
|
if (lastLocation == null) lastLocation = loc;
|
||||||
|
// are we contiguous to the last genome loc?
|
||||||
|
else if (lastLocation.contiguousP(loc)) {
|
||||||
|
lastLocation = lastLocation.merge(loc);
|
||||||
|
}
|
||||||
|
// we have an existing region, and the current is not contiguous. Emit the old and store the new
|
||||||
|
else {
|
||||||
|
warnUser();
|
||||||
|
lastLocation = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// regardless, we have a warning in the queue
|
||||||
|
warningInQueue = true;
|
||||||
|
}
|
||||||
|
// we don't have a warning at this, but there is one in the queue
|
||||||
|
else if (warningInQueue) {
|
||||||
|
warnUser();
|
||||||
|
lastLocation = null;
|
||||||
|
}
|
||||||
|
// return true if we exceeded the max size at this location
|
||||||
|
return exceeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clean up the warning queue, making sure we haven't stored a warning
|
||||||
|
* that hasn't been emitted yet.
|
||||||
|
*/
|
||||||
|
public void cleanWarningQueue() {
|
||||||
|
if (warningInQueue) warnUser();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** warn the user, checking to make sure we haven't exceeded the maximum warning level. */
|
||||||
|
protected void warnUser() {
|
||||||
|
|
||||||
|
// make sure we have a warning in the queue
|
||||||
|
if (!warningInQueue) throw new IllegalStateException("Without a warning in the queue, we shouldn't see a call to warnUser()");
|
||||||
|
|
||||||
|
// reset the warning light
|
||||||
|
warningInQueue = false;
|
||||||
|
|
||||||
|
// check to see if we've meet our warning threshold or not. If we're equal to the threshold emit a message saying this
|
||||||
|
// is the last warning they'll see
|
||||||
|
if (warningsEmitted < MAX_WARNINGS) {
|
||||||
|
warningsEmitted++;
|
||||||
|
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation);
|
||||||
|
} else if (warningsEmitted == MAX_WARNINGS) {
|
||||||
|
warningsEmitted++;
|
||||||
|
Utils.warnUser("Unable to add a reads to the pile-up, we're over the hanger limit of " + maxPileupSize + " at location: " + lastLocation +
|
||||||
|
"; the maximum warning count has been reached, we will no longer emit warnings of this nature!!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* is the specified location in the current exceeded pileup region
|
||||||
|
* @param position the position
|
||||||
|
* @return true if we're in that region
|
||||||
|
*/
|
||||||
|
public boolean inDroppedRegion(GenomeLoc position) {
|
||||||
|
if (lastLocation == null || position == null) return false;
|
||||||
|
return position.overlapsP(lastLocation) ? true : false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -260,11 +260,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
|
||||||
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
|
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
|
||||||
if ( rawPileup.size() == getToolkit().getArguments().readMaxPileup )
|
if ( rawContext.hasExceededMaxPileup() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||||
|
|
||||||
// filter the context based on min base and mapping qualities
|
// filter the context based on min base and mapping qualities
|
||||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="FixBAMSortOrder">
|
|
||||||
<executable name="FixBAMSortOrder">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- The walker -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.FixBAMSortOrderTag" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
Loading…
Reference in New Issue