From 4a1d79cd7b871f9def003383cca5e10199a050c0 Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 1 Sep 2009 04:21:58 +0000 Subject: [PATCH] added a flag, maximum_reads_at_locus, shortName "mrl", which limits the number of reads we add to the locusByHanger. In some bam files misalignment produces pile-ups of 750K or more reads. We now limit this to the default of 100K reads. The user is warned if a locus exceeds this threshold, and no more reads are added. Also CombineDup walker had an incorrect package name. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1496 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GATKArgumentCollection.java | 7 +++++ .../sting/gatk/GenomeAnalysisEngine.java | 3 ++- .../org/broadinstitute/sting/gatk/Reads.java | 23 +++++++++++----- .../gatk/datasources/providers/LocusView.java | 27 +++++++++---------- .../gatk/iterators/LocusIteratorByHanger.java | 22 ++++++++++----- .../duplicates/CombineDuplicatesWalker.java | 2 +- .../utils/cmdLine/CommandLineProgram.java | 15 +++++------ 7 files changed, 63 insertions(+), 36 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index 3d583fd59..beb54e03f 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -121,6 +121,10 @@ public class GATKArgumentCollection { @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) public Boolean unsafe = false; + @Element(required = false) + @Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. 100,000 by default.", required = false) + public int readMaxPileup = 100000; + @Element(required = false) @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) public Boolean disableThreading = false; @@ -241,6 +245,9 @@ public class GATKArgumentCollection { if (!other.unsafe.equals(this.unsafe)) { return false; } + if (other.readMaxPileup != this.readMaxPileup) { + return false; + } if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) || ( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 705696836..888dd35c8 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -357,7 +357,8 @@ public class GenomeAnalysisEngine { argCollection.downsampleFraction, argCollection.downsampleCoverage, !argCollection.unsafe, - filters ); + filters, + argCollection.readMaxPileup); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 89c89fe44..1e09d2c75 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk; -import java.io.File; -import java.util.List; -import java.util.ArrayList; - -import net.sf.samtools.SAMFileReader; import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMFileReader; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; /** * User: hanna * Date: May 14, 2009 @@ -30,6 +30,7 @@ public class Reads { private Integer downsampleToCoverage = null; private Boolean beSafe = null; private List supplementalFilters = null; + private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT /** * Gets a list of the files acting as sources of reads. @@ -63,6 +64,14 @@ public class Reads { return downsampleToCoverage; } + /** + * get the maximum number of reads we allow at a locus for locus-by-hanger + * @return the maximum reads allowed in a pile-up + */ + public Integer getMaxReadsAtLocus() { + return maximumReadsAtLocus; + } + /** * Return whether to 'verify' the reads as we pass through them. * @return Whether to verify the reads. @@ -100,12 +109,14 @@ public class Reads { Double downsampleFraction, Integer downsampleCoverage, Boolean beSafe, - List supplementalFilters ) { + List supplementalFilters, + int maximumReadsAtLocus) { this.readsFiles = samFiles; this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; this.downsampleToCoverage = downsampleCoverage; this.beSafe = beSafe; this.supplementalFilters = supplementalFilters; + this.maximumReadsAtLocus = maximumReadsAtLocus; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 977baf0f9..612d03e76 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -1,20 +1,19 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger; -import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; -import net.sf.samtools.SAMRecord; - -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Collection; -import java.util.Arrays; - import net.sf.picard.filter.FilteringIterator; import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger; +import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; /** * User: hanna * Date: May 13, 2009 @@ -60,7 +59,7 @@ public abstract class LocusView extends LocusIterator implements View { Iterator reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); this.sourceInfo = provider.getReadIterator().getSourceInfo(); - this.loci = new LocusIteratorByHanger(reads); + this.loci = new LocusIteratorByHanger(reads, sourceInfo); seedNextLocus(); provider.register(this); diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java index cde8cca80..41f73c081 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.AlignmentBlock; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -58,14 +59,15 @@ public class LocusIteratorByHanger extends LocusIterator { final int INCREMENT_SIZE = 100; final boolean DEBUG = false; boolean justCleared = false; - + private Reads readInfo; // ----------------------------------------------------------------------------------------------------------------- // // constructors and other basic operations // // ----------------------------------------------------------------------------------------------------------------- - public LocusIteratorByHanger(final Iterator samIterator) { + public LocusIteratorByHanger(final Iterator samIterator, Reads readInformation) { this.it = new PushbackIterator(samIterator); + this.readInfo = readInformation; } public Iterator iterator() { @@ -107,7 +109,7 @@ public class LocusIteratorByHanger extends LocusIterator { // ----------------------------------------------------------------------------------------------------------------- public AlignmentContext next() { if ( ! currentPositionIsFullyCovered() ) - expandWindow(INCREMENT_SIZE); + expandWindow(INCREMENT_SIZE,readInfo.getMaxReadsAtLocus()); if ( DEBUG ) { logger.debug("in Next:"); @@ -167,7 +169,7 @@ public class LocusIteratorByHanger extends LocusIterator { } } - private final void expandWindow(final int incrementSize) { + private final void expandWindow(final int incrementSize, final int maximumPileupSize) { // todo: reenable if ( false && incrementSize != 1 ) { Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize)); @@ -177,7 +179,7 @@ public class LocusIteratorByHanger extends LocusIterator { logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext())); printState(); } - + boolean warned = false; // warn them once per locus while ( it.hasNext() ) { if ( DEBUG ) { logger.debug(String.format("Expanding window")); @@ -204,7 +206,15 @@ public class LocusIteratorByHanger extends LocusIterator { break; } else - hangRead(read); + // check to see if we've exceeded the maximum number of reads in the pile-up + if (readHanger.size() < maximumPileupSize) + hangRead(read); + else { + // if we haven't warned the user for this locus, do so now + if (!warned) + Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + readHanger.getLeftLoc()); + warned = true; + } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java index 894e63f1f..4631761dd 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers; +package org.broadinstitute.sting.playground.gatk.walkers.duplicates; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java index 7923ebbaf..dce94ccc5 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java @@ -46,7 +46,7 @@ public abstract class CommandLineProgram { shortName="l", doc="Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ", required=false) - protected String logging_level = "ERROR"; + protected String logging_level = "WARN"; /** @@ -302,24 +302,23 @@ public abstract class CommandLineProgram { * level that was provided. */ private void setupLoggerLevel() { - Level par = Level.WARN; - if (logging_level.equals("DEBUG")) { + if (logging_level.toUpperCase().equals("DEBUG")) { par = Level.DEBUG; } - else if (logging_level.equals("ERROR")) { + else if (logging_level.toUpperCase().equals("ERROR")) { par = Level.ERROR; } - else if (logging_level.equals("FATAL")) { + else if (logging_level.toUpperCase().equals("FATAL")) { par = Level.FATAL; } - else if (logging_level.equals("INFO")) { + else if (logging_level.toUpperCase().equals("INFO")) { par = Level.INFO; } - else if (logging_level.equals("WARN")) { + else if (logging_level.toUpperCase().equals("WARN")) { par = Level.WARN; } - else if (logging_level.equals("OFF")) { + else if (logging_level.toUpperCase().equals("OFF")) { par = Level.OFF; } else {