added a flag, maximum_reads_at_locus, shortName "mrl", which limits the number of reads we add to the locusByHanger. In some bam files misalignment produces pile-ups of 750K or more reads. We now limit this to the default of 100K reads.

The user is warned if a locus exceeds this threshold, and no more reads are added.

Also CombineDup walker had an incorrect package name.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1496 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-01 04:21:58 +00:00
parent 0addae967a
commit 4a1d79cd7b
7 changed files with 63 additions and 36 deletions

View File

@ -121,6 +121,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public Boolean unsafe = false; public Boolean unsafe = false;
@Element(required = false)
@Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. 100,000 by default.", required = false)
public int readMaxPileup = 100000;
@Element(required = false) @Element(required = false)
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false; public Boolean disableThreading = false;
@ -241,6 +245,9 @@ public class GATKArgumentCollection {
if (!other.unsafe.equals(this.unsafe)) { if (!other.unsafe.equals(this.unsafe)) {
return false; return false;
} }
if (other.readMaxPileup != this.readMaxPileup) {
return false;
}
if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) || if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) ||
( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) { ( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) {
return false; return false;

View File

@ -357,7 +357,8 @@ public class GenomeAnalysisEngine {
argCollection.downsampleFraction, argCollection.downsampleFraction,
argCollection.downsampleCoverage, argCollection.downsampleCoverage,
!argCollection.unsafe, !argCollection.unsafe,
filters ); filters,
argCollection.readMaxPileup);
} }
/** /**

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk; package org.broadinstitute.sting.gatk;
import java.io.File;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMFileReader;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/** /**
* User: hanna * User: hanna
* Date: May 14, 2009 * Date: May 14, 2009
@ -30,6 +30,7 @@ public class Reads {
private Integer downsampleToCoverage = null; private Integer downsampleToCoverage = null;
private Boolean beSafe = null; private Boolean beSafe = null;
private List<SamRecordFilter> supplementalFilters = null; private List<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
/** /**
* Gets a list of the files acting as sources of reads. * Gets a list of the files acting as sources of reads.
@ -63,6 +64,14 @@ public class Reads {
return downsampleToCoverage; return downsampleToCoverage;
} }
/**
* get the maximum number of reads we allow at a locus for locus-by-hanger
* @return the maximum reads allowed in a pile-up
*/
public Integer getMaxReadsAtLocus() {
return maximumReadsAtLocus;
}
/** /**
* Return whether to 'verify' the reads as we pass through them. * Return whether to 'verify' the reads as we pass through them.
* @return Whether to verify the reads. * @return Whether to verify the reads.
@ -100,12 +109,14 @@ public class Reads {
Double downsampleFraction, Double downsampleFraction,
Integer downsampleCoverage, Integer downsampleCoverage,
Boolean beSafe, Boolean beSafe,
List<SamRecordFilter> supplementalFilters ) { List<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus) {
this.readsFiles = samFiles; this.readsFiles = samFiles;
this.validationStringency = strictness; this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction; this.downsamplingFraction = downsampleFraction;
this.downsampleToCoverage = downsampleCoverage; this.downsampleToCoverage = downsampleCoverage;
this.beSafe = beSafe; this.beSafe = beSafe;
this.supplementalFilters = supplementalFilters; this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
} }
} }

View File

@ -1,20 +1,19 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Collection;
import java.util.Arrays;
import net.sf.picard.filter.FilteringIterator; import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;
/** /**
* User: hanna * User: hanna
* Date: May 13, 2009 * Date: May 13, 2009
@ -60,7 +59,7 @@ public abstract class LocusView extends LocusIterator implements View {
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
this.sourceInfo = provider.getReadIterator().getSourceInfo(); this.sourceInfo = provider.getReadIterator().getSourceInfo();
this.loci = new LocusIteratorByHanger(reads); this.loci = new LocusIteratorByHanger(reads, sourceInfo);
seedNextLocus(); seedNextLocus();
provider.register(this); provider.register(this);

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.AlignmentBlock; import net.sf.samtools.AlignmentBlock;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -58,14 +59,15 @@ public class LocusIteratorByHanger extends LocusIterator {
final int INCREMENT_SIZE = 100; final int INCREMENT_SIZE = 100;
final boolean DEBUG = false; final boolean DEBUG = false;
boolean justCleared = false; boolean justCleared = false;
private Reads readInfo;
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
// //
// constructors and other basic operations // constructors and other basic operations
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator) { public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator, Reads readInformation) {
this.it = new PushbackIterator<SAMRecord>(samIterator); this.it = new PushbackIterator<SAMRecord>(samIterator);
this.readInfo = readInformation;
} }
public Iterator<AlignmentContext> iterator() { public Iterator<AlignmentContext> iterator() {
@ -107,7 +109,7 @@ public class LocusIteratorByHanger extends LocusIterator {
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
public AlignmentContext next() { public AlignmentContext next() {
if ( ! currentPositionIsFullyCovered() ) if ( ! currentPositionIsFullyCovered() )
expandWindow(INCREMENT_SIZE); expandWindow(INCREMENT_SIZE,readInfo.getMaxReadsAtLocus());
if ( DEBUG ) { if ( DEBUG ) {
logger.debug("in Next:"); logger.debug("in Next:");
@ -167,7 +169,7 @@ public class LocusIteratorByHanger extends LocusIterator {
} }
} }
private final void expandWindow(final int incrementSize) { private final void expandWindow(final int incrementSize, final int maximumPileupSize) {
// todo: reenable // todo: reenable
if ( false && incrementSize != 1 ) { if ( false && incrementSize != 1 ) {
Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize)); Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize));
@ -177,7 +179,7 @@ public class LocusIteratorByHanger extends LocusIterator {
logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext())); logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext()));
printState(); printState();
} }
boolean warned = false; // warn them once per locus
while ( it.hasNext() ) { while ( it.hasNext() ) {
if ( DEBUG ) { if ( DEBUG ) {
logger.debug(String.format("Expanding window")); logger.debug(String.format("Expanding window"));
@ -204,7 +206,15 @@ public class LocusIteratorByHanger extends LocusIterator {
break; break;
} }
else else
// check to see if we've exceeded the maximum number of reads in the pile-up
if (readHanger.size() < maximumPileupSize)
hangRead(read); hangRead(read);
else {
// if we haven't warned the user for this locus, do so now
if (!warned)
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + readHanger.getLeftLoc());
warned = true;
}
} }
} }

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers.duplicates;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;

View File

@ -46,7 +46,7 @@ public abstract class CommandLineProgram {
shortName="l", shortName="l",
doc="Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ", doc="Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ",
required=false) required=false)
protected String logging_level = "ERROR"; protected String logging_level = "WARN";
/** /**
@ -302,24 +302,23 @@ public abstract class CommandLineProgram {
* level that was provided. * level that was provided.
*/ */
private void setupLoggerLevel() { private void setupLoggerLevel() {
Level par = Level.WARN; Level par = Level.WARN;
if (logging_level.equals("DEBUG")) { if (logging_level.toUpperCase().equals("DEBUG")) {
par = Level.DEBUG; par = Level.DEBUG;
} }
else if (logging_level.equals("ERROR")) { else if (logging_level.toUpperCase().equals("ERROR")) {
par = Level.ERROR; par = Level.ERROR;
} }
else if (logging_level.equals("FATAL")) { else if (logging_level.toUpperCase().equals("FATAL")) {
par = Level.FATAL; par = Level.FATAL;
} }
else if (logging_level.equals("INFO")) { else if (logging_level.toUpperCase().equals("INFO")) {
par = Level.INFO; par = Level.INFO;
} }
else if (logging_level.equals("WARN")) { else if (logging_level.toUpperCase().equals("WARN")) {
par = Level.WARN; par = Level.WARN;
} }
else if (logging_level.equals("OFF")) { else if (logging_level.toUpperCase().equals("OFF")) {
par = Level.OFF; par = Level.OFF;
} }
else { else {