added a flag, maximum_reads_at_locus, shortName "mrl", which limits the number of reads we add to the locusByHanger. In some bam files misalignment produces pile-ups of 750K or more reads. We now limit this to the default of 100K reads.

The user is warned if a locus exceeds this threshold, and no more reads are added.

Also CombineDup walker had an incorrect package name.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1496 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-01 04:21:58 +00:00
parent 0addae967a
commit 4a1d79cd7b
7 changed files with 63 additions and 36 deletions

View File

@ -121,6 +121,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public Boolean unsafe = false;
@Element(required = false)
@Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. 100,000 by default.", required = false)
public int readMaxPileup = 100000;
@Element(required = false)
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false;
@ -241,6 +245,9 @@ public class GATKArgumentCollection {
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
if (other.readMaxPileup != this.readMaxPileup) {
return false;
}
if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) ||
( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) {
return false;

View File

@ -357,7 +357,8 @@ public class GenomeAnalysisEngine {
argCollection.downsampleFraction,
argCollection.downsampleCoverage,
!argCollection.unsafe,
filters );
filters,
argCollection.readMaxPileup);
}
/**

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk;
import java.io.File;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMFileReader;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* User: hanna
* Date: May 14, 2009
@ -30,6 +30,7 @@ public class Reads {
private Integer downsampleToCoverage = null;
private Boolean beSafe = null;
private List<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
/**
* Gets a list of the files acting as sources of reads.
@ -63,6 +64,14 @@ public class Reads {
return downsampleToCoverage;
}
/**
* get the maximum number of reads we allow at a locus for locus-by-hanger
* @return the maximum reads allowed in a pile-up
*/
public Integer getMaxReadsAtLocus() {
return maximumReadsAtLocus;
}
/**
* Return whether to 'verify' the reads as we pass through them.
* @return Whether to verify the reads.
@ -100,12 +109,14 @@ public class Reads {
Double downsampleFraction,
Integer downsampleCoverage,
Boolean beSafe,
List<SamRecordFilter> supplementalFilters ) {
List<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus) {
this.readsFiles = samFiles;
this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction;
this.downsampleToCoverage = downsampleCoverage;
this.beSafe = beSafe;
this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
}
}

View File

@ -1,20 +1,19 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Collection;
import java.util.Arrays;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* User: hanna
* Date: May 13, 2009
@ -60,7 +59,7 @@ public abstract class LocusView extends LocusIterator implements View {
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
this.sourceInfo = provider.getReadIterator().getSourceInfo();
this.loci = new LocusIteratorByHanger(reads);
this.loci = new LocusIteratorByHanger(reads, sourceInfo);
seedNextLocus();
provider.register(this);

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.AlignmentBlock;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -58,14 +59,15 @@ public class LocusIteratorByHanger extends LocusIterator {
final int INCREMENT_SIZE = 100;
final boolean DEBUG = false;
boolean justCleared = false;
private Reads readInfo;
// -----------------------------------------------------------------------------------------------------------------
//
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator) {
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator, Reads readInformation) {
this.it = new PushbackIterator<SAMRecord>(samIterator);
this.readInfo = readInformation;
}
public Iterator<AlignmentContext> iterator() {
@ -107,7 +109,7 @@ public class LocusIteratorByHanger extends LocusIterator {
// -----------------------------------------------------------------------------------------------------------------
public AlignmentContext next() {
if ( ! currentPositionIsFullyCovered() )
expandWindow(INCREMENT_SIZE);
expandWindow(INCREMENT_SIZE,readInfo.getMaxReadsAtLocus());
if ( DEBUG ) {
logger.debug("in Next:");
@ -167,7 +169,7 @@ public class LocusIteratorByHanger extends LocusIterator {
}
}
private final void expandWindow(final int incrementSize) {
private final void expandWindow(final int incrementSize, final int maximumPileupSize) {
// todo: reenable
if ( false && incrementSize != 1 ) {
Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize));
@ -177,7 +179,7 @@ public class LocusIteratorByHanger extends LocusIterator {
logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext()));
printState();
}
boolean warned = false; // warn them once per locus
while ( it.hasNext() ) {
if ( DEBUG ) {
logger.debug(String.format("Expanding window"));
@ -204,7 +206,15 @@ public class LocusIteratorByHanger extends LocusIterator {
break;
}
else
hangRead(read);
// check to see if we've exceeded the maximum number of reads in the pile-up
if (readHanger.size() < maximumPileupSize)
hangRead(read);
else {
// if we haven't warned the user for this locus, do so now
if (!warned)
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + readHanger.getLeftLoc());
warned = true;
}
}
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.gatk.walkers;
package org.broadinstitute.sting.playground.gatk.walkers.duplicates;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;

View File

@ -46,7 +46,7 @@ public abstract class CommandLineProgram {
shortName="l",
doc="Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ",
required=false)
protected String logging_level = "ERROR";
protected String logging_level = "WARN";
/**
@ -302,24 +302,23 @@ public abstract class CommandLineProgram {
* level that was provided.
*/
private void setupLoggerLevel() {
Level par = Level.WARN;
if (logging_level.equals("DEBUG")) {
if (logging_level.toUpperCase().equals("DEBUG")) {
par = Level.DEBUG;
}
else if (logging_level.equals("ERROR")) {
else if (logging_level.toUpperCase().equals("ERROR")) {
par = Level.ERROR;
}
else if (logging_level.equals("FATAL")) {
else if (logging_level.toUpperCase().equals("FATAL")) {
par = Level.FATAL;
}
else if (logging_level.equals("INFO")) {
else if (logging_level.toUpperCase().equals("INFO")) {
par = Level.INFO;
}
else if (logging_level.equals("WARN")) {
else if (logging_level.toUpperCase().equals("WARN")) {
par = Level.WARN;
}
else if (logging_level.equals("OFF")) {
else if (logging_level.toUpperCase().equals("OFF")) {
par = Level.OFF;
}
else {