Add downsampling arguments to Mutect
- maxReadsInRegionPErSample, minReadsPerAlignment start are now M2 args - maxReadsInMemoryPerSample is an activeRegionWalker arg
This commit is contained in:
parent
66aa735817
commit
b1f2301dd3
|
|
@ -200,8 +200,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
|
|||
private double log10GlobalReadMismappingRate;
|
||||
private static final int REFERENCE_PADDING = 500;
|
||||
private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
|
||||
private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
|
||||
private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
|
||||
|
||||
@ArgumentCollection
|
||||
protected M2ArgumentCollection MTAC = new M2ArgumentCollection();
|
||||
|
|
@ -293,6 +291,16 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
|
|||
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
|
||||
protected boolean doNotRunPhysicalPhasing = false;
|
||||
|
||||
/**
|
||||
* When downsampling, level the coverage of the reads in each sample to no more than maxReadsInRegionPerSample reads,
|
||||
* not reducing coverage at any read start to less than minReadsPerAlignmentStart
|
||||
*/
|
||||
@Argument(fullName = "maxReadsInRegionPerSample", shortName = "maxReadsInRegionPerSample", doc="Maximum reads in an active region", required = false)
|
||||
protected int maxReadsInRegionPerSample = 1000;
|
||||
|
||||
@Argument(fullName = "minReadsPerAlignmentStart", shortName = "minReadsPerAlignStart", doc="Minimum number of reads sharing the same alignment start for each genomic location in an active region", required = false)
|
||||
protected int minReadsPerAlignmentStart = 5;
|
||||
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return MTAC.dbsnp.dbsnp; }
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -162,8 +162,8 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), this.walker.maxProbPropagationDistance, this.walker.activeProbThreshold,
|
||||
BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
|
||||
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * ReadUtils.getSAMFileSamples(engine.getSAMFileHeader()).size();
|
||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
||||
final int maxReadsAcrossSamples = this.walker.maxReadsInMemoryPerSample * ReadUtils.getSAMFileSamples(engine.getSAMFileHeader()).size();
|
||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, this.walker.maxTotalReadsInMemory);
|
||||
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -78,20 +78,4 @@ public @interface ActiveRegionTraversalParameters {
|
|||
* @return the breadth of the band pass gaussian kernel we want for our traversal
|
||||
*/
|
||||
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
|
||||
|
||||
/**
|
||||
* What is the maximum number of reads we're willing to hold in memory per sample
|
||||
* during the traversal? This limits our exposure to unusually large amounts
|
||||
* of coverage in the engine.
|
||||
* @return the maximum number of reads we're willing to hold in memory
|
||||
*/
|
||||
public int maxReadsToHoldInMemoryPerSample() default 30000;
|
||||
|
||||
/**
|
||||
* No matter what the per sample value says, we will never hold more than this
|
||||
* number of reads in memory at any time. Provides an upper bound on the total number
|
||||
* of reads in the case where we have a lot of samples.
|
||||
* @return the maximum number of reads to hold in memory
|
||||
*/
|
||||
public int maxReadsToHoldTotal() default 10000000;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,6 +108,24 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
||||
public Double bandPassSigma = null;
|
||||
|
||||
/**
|
||||
* What is the maximum number of reads we're willing to hold in memory per sample
|
||||
* during the traversal? This limits our exposure to unusually large amounts
|
||||
* of coverage in the engine.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="maxReadsInMemoryPerSample", shortName="maxReadsInMemoryPerSample", doc="Maximum reads per sample given to traversal map() function", required = false)
|
||||
public int maxReadsInMemoryPerSample = 30000;
|
||||
|
||||
/**
|
||||
* What is the maximum number of reads we're willing to hold in memory per sample
|
||||
* during the traversal? This limits our exposure to unusually large amounts
|
||||
* of coverage in the engine.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="maxTotalReadsInMemory", shortName="maxTotalReadsInMemory", doc="Maximum total reads given to traversal map() function", required = false)
|
||||
public int maxTotalReadsInMemory = 10000000;
|
||||
|
||||
/*
|
||||
* For active region limits in ActivityProfile
|
||||
* */
|
||||
|
|
|
|||
Loading…
Reference in New Issue