added a flag, maximum_reads_at_locus, shortName "mrl", which limits the number of reads we add to the locusByHanger. In some bam files misalignment produces pile-ups of 750K or more reads. We now limit this to the default of 100K reads.
The user is warned if a locus exceeds this threshold, and no more reads are added. Also CombineDup walker had an incorrect package name. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1496 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0addae967a
commit
4a1d79cd7b
|
|
@ -121,6 +121,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
|
||||
public Boolean unsafe = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. 100,000 by default.", required = false)
|
||||
public int readMaxPileup = 100000;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
|
@ -241,6 +245,9 @@ public class GATKArgumentCollection {
|
|||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
if (other.readMaxPileup != this.readMaxPileup) {
|
||||
return false;
|
||||
}
|
||||
if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) ||
|
||||
( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -357,7 +357,8 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.downsampleFraction,
|
||||
argCollection.downsampleCoverage,
|
||||
!argCollection.unsafe,
|
||||
filters );
|
||||
filters,
|
||||
argCollection.readMaxPileup);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
|
|
@ -30,6 +30,7 @@ public class Reads {
|
|||
private Integer downsampleToCoverage = null;
|
||||
private Boolean beSafe = null;
|
||||
private List<SamRecordFilter> supplementalFilters = null;
|
||||
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||
|
||||
/**
|
||||
* Gets a list of the files acting as sources of reads.
|
||||
|
|
@ -63,6 +64,14 @@ public class Reads {
|
|||
return downsampleToCoverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the maximum number of reads we allow at a locus for locus-by-hanger
|
||||
* @return the maximum reads allowed in a pile-up
|
||||
*/
|
||||
public Integer getMaxReadsAtLocus() {
|
||||
return maximumReadsAtLocus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether to 'verify' the reads as we pass through them.
|
||||
* @return Whether to verify the reads.
|
||||
|
|
@ -100,12 +109,14 @@ public class Reads {
|
|||
Double downsampleFraction,
|
||||
Integer downsampleCoverage,
|
||||
Boolean beSafe,
|
||||
List<SamRecordFilter> supplementalFilters ) {
|
||||
List<SamRecordFilter> supplementalFilters,
|
||||
int maximumReadsAtLocus) {
|
||||
this.readsFiles = samFiles;
|
||||
this.validationStringency = strictness;
|
||||
this.downsamplingFraction = downsampleFraction;
|
||||
this.downsampleToCoverage = downsampleCoverage;
|
||||
this.beSafe = beSafe;
|
||||
this.supplementalFilters = supplementalFilters;
|
||||
this.maximumReadsAtLocus = maximumReadsAtLocus;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,19 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Collection;
|
||||
import java.util.Arrays;
|
||||
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 13, 2009
|
||||
|
|
@ -60,7 +59,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
||||
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||
|
||||
this.loci = new LocusIteratorByHanger(reads);
|
||||
this.loci = new LocusIteratorByHanger(reads, sourceInfo);
|
||||
seedNextLocus();
|
||||
|
||||
provider.register(this);
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
import net.sf.samtools.AlignmentBlock;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -58,14 +59,15 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
final int INCREMENT_SIZE = 100;
|
||||
final boolean DEBUG = false;
|
||||
boolean justCleared = false;
|
||||
|
||||
private Reads readInfo;
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors and other basic operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator) {
|
||||
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
||||
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
||||
this.readInfo = readInformation;
|
||||
}
|
||||
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
|
|
@ -107,7 +109,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public AlignmentContext next() {
|
||||
if ( ! currentPositionIsFullyCovered() )
|
||||
expandWindow(INCREMENT_SIZE);
|
||||
expandWindow(INCREMENT_SIZE,readInfo.getMaxReadsAtLocus());
|
||||
|
||||
if ( DEBUG ) {
|
||||
logger.debug("in Next:");
|
||||
|
|
@ -167,7 +169,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
private final void expandWindow(final int incrementSize) {
|
||||
private final void expandWindow(final int incrementSize, final int maximumPileupSize) {
|
||||
// todo: reenable
|
||||
if ( false && incrementSize != 1 ) {
|
||||
Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize));
|
||||
|
|
@ -177,7 +179,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext()));
|
||||
printState();
|
||||
}
|
||||
|
||||
boolean warned = false; // warn them once per locus
|
||||
while ( it.hasNext() ) {
|
||||
if ( DEBUG ) {
|
||||
logger.debug(String.format("Expanding window"));
|
||||
|
|
@ -204,7 +206,15 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
break;
|
||||
}
|
||||
else
|
||||
hangRead(read);
|
||||
// check to see if we've exceeded the maximum number of reads in the pile-up
|
||||
if (readHanger.size() < maximumPileupSize)
|
||||
hangRead(read);
|
||||
else {
|
||||
// if we haven't warned the user for this locus, do so now
|
||||
if (!warned)
|
||||
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + readHanger.getLeftLoc());
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.duplicates;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public abstract class CommandLineProgram {
|
|||
shortName="l",
|
||||
doc="Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ",
|
||||
required=false)
|
||||
protected String logging_level = "ERROR";
|
||||
protected String logging_level = "WARN";
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -302,24 +302,23 @@ public abstract class CommandLineProgram {
|
|||
* level that was provided.
|
||||
*/
|
||||
private void setupLoggerLevel() {
|
||||
|
||||
Level par = Level.WARN;
|
||||
if (logging_level.equals("DEBUG")) {
|
||||
if (logging_level.toUpperCase().equals("DEBUG")) {
|
||||
par = Level.DEBUG;
|
||||
}
|
||||
else if (logging_level.equals("ERROR")) {
|
||||
else if (logging_level.toUpperCase().equals("ERROR")) {
|
||||
par = Level.ERROR;
|
||||
}
|
||||
else if (logging_level.equals("FATAL")) {
|
||||
else if (logging_level.toUpperCase().equals("FATAL")) {
|
||||
par = Level.FATAL;
|
||||
}
|
||||
else if (logging_level.equals("INFO")) {
|
||||
else if (logging_level.toUpperCase().equals("INFO")) {
|
||||
par = Level.INFO;
|
||||
}
|
||||
else if (logging_level.equals("WARN")) {
|
||||
else if (logging_level.toUpperCase().equals("WARN")) {
|
||||
par = Level.WARN;
|
||||
}
|
||||
else if (logging_level.equals("OFF")) {
|
||||
else if (logging_level.toUpperCase().equals("OFF")) {
|
||||
par = Level.OFF;
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
Loading…
Reference in New Issue