Merge pull request #1496 from broadinstitute/db_m2_downsampling
Expose downsampling parameters as CLI arguments to Mutect2
This commit is contained in:
commit
234ccdf0c3
|
|
@ -194,8 +194,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
|
||||||
private double log10GlobalReadMismappingRate;
|
private double log10GlobalReadMismappingRate;
|
||||||
private static final int REFERENCE_PADDING = 500;
|
private static final int REFERENCE_PADDING = 500;
|
||||||
private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
|
private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
|
||||||
private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
|
|
||||||
private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
|
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected M2ArgumentCollection MTAC = new M2ArgumentCollection();
|
protected M2ArgumentCollection MTAC = new M2ArgumentCollection();
|
||||||
|
|
@ -287,6 +285,16 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
|
||||||
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
|
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
|
||||||
protected boolean doNotRunPhysicalPhasing = false;
|
protected boolean doNotRunPhysicalPhasing = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When downsampling, level the coverage of the reads in each sample to no more than maxReadsInRegionPerSample reads,
|
||||||
|
* not reducing coverage at any read start to less than minReadsPerAlignmentStart
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "maxReadsInRegionPerSample", shortName = "maxReadsInRegionPerSample", doc="Maximum reads in an active region", required = false)
|
||||||
|
protected int maxReadsInRegionPerSample = 1000;
|
||||||
|
|
||||||
|
@Argument(fullName = "minReadsPerAlignmentStart", shortName = "minReadsPerAlignStart", doc="Minimum number of reads sharing the same alignment start for each genomic location in an active region", required = false)
|
||||||
|
protected int minReadsPerAlignmentStart = 5;
|
||||||
|
|
||||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return MTAC.dbsnp.dbsnp; }
|
public RodBinding<VariantContext> getDbsnpRodBinding() { return MTAC.dbsnp.dbsnp; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -162,8 +162,8 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), this.walker.maxProbPropagationDistance, this.walker.activeProbThreshold,
|
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), this.walker.maxProbPropagationDistance, this.walker.activeProbThreshold,
|
||||||
BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||||
|
|
||||||
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * ReadUtils.getSAMFileSamples(engine.getSAMFileHeader()).size();
|
final int maxReadsAcrossSamples = this.walker.maxReadsInMemoryPerSample * ReadUtils.getSAMFileSamples(engine.getSAMFileHeader()).size();
|
||||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, this.walker.maxTotalReadsInMemory);
|
||||||
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,20 +78,4 @@ public @interface ActiveRegionTraversalParameters {
|
||||||
* @return the breadth of the band pass gaussian kernel we want for our traversal
|
* @return the breadth of the band pass gaussian kernel we want for our traversal
|
||||||
*/
|
*/
|
||||||
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
|
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
|
||||||
|
|
||||||
/**
|
|
||||||
* What is the maximum number of reads we're willing to hold in memory per sample
|
|
||||||
* during the traversal? This limits our exposure to unusually large amounts
|
|
||||||
* of coverage in the engine.
|
|
||||||
* @return the maximum number of reads we're willing to hold in memory
|
|
||||||
*/
|
|
||||||
public int maxReadsToHoldInMemoryPerSample() default 30000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* No matter what the per sample value says, we will never hold more than this
|
|
||||||
* number of reads in memory at any time. Provides an upper bound on the total number
|
|
||||||
* of reads in the case where we have a lot of samples.
|
|
||||||
* @return the maximum number of reads to hold in memory
|
|
||||||
*/
|
|
||||||
public int maxReadsToHoldTotal() default 10000000;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -108,6 +108,24 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
||||||
public Double bandPassSigma = null;
|
public Double bandPassSigma = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What is the maximum number of reads we're willing to hold in memory per sample
|
||||||
|
* during the traversal? This limits our exposure to unusually large amounts
|
||||||
|
* of coverage in the engine.
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="maxReadsInMemoryPerSample", shortName="maxReadsInMemoryPerSample", doc="Maximum reads per sample given to traversal map() function", required = false)
|
||||||
|
public int maxReadsInMemoryPerSample = 30000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What is the maximum number of reads we're willing to hold in memory per sample
|
||||||
|
* during the traversal? This limits our exposure to unusually large amounts
|
||||||
|
* of coverage in the engine.
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="maxTotalReadsInMemory", shortName="maxTotalReadsInMemory", doc="Maximum total reads given to traversal map() function", required = false)
|
||||||
|
public int maxTotalReadsInMemory = 10000000;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For active region limits in ActivityProfile
|
* For active region limits in ActivityProfile
|
||||||
* */
|
* */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue