Merge pull request #863 from broadinstitute/kc_m2_initial_commit
Seeking comments on visibility changes to HaplotypeCaller-related classes Welcome to GATK-master, MuTect2!
This commit is contained in:
commit
517320092c
|
|
@ -72,7 +72,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||||
*/
|
*/
|
||||||
class ActiveRegionTrimmer {
|
public class ActiveRegionTrimmer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Genome location parser use in order to create and manipulate genomic intervals.
|
* Genome location parser use in order to create and manipulate genomic intervals.
|
||||||
|
|
@ -115,11 +115,11 @@ class ActiveRegionTrimmer {
|
||||||
*/
|
*/
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false)
|
@Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false)
|
||||||
protected int indelPadding = 150;
|
public int indelPadding = 150;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false)
|
@Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false)
|
||||||
protected int snpPadding = 20;
|
public int snpPadding = 20;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds a reference the trimmer logger.
|
* Holds a reference the trimmer logger.
|
||||||
|
|
@ -143,7 +143,7 @@ class ActiveRegionTrimmer {
|
||||||
* @throws IllegalArgumentException if the input location parser is {@code null}.
|
* @throws IllegalArgumentException if the input location parser is {@code null}.
|
||||||
* @throws UserException.BadArgumentValue if any of the user argument values is invalid.
|
* @throws UserException.BadArgumentValue if any of the user argument values is invalid.
|
||||||
*/
|
*/
|
||||||
void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) {
|
public void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) {
|
||||||
if (locParser != null)
|
if (locParser != null)
|
||||||
throw new IllegalStateException(getClass().getSimpleName() + " instance initialized twice");
|
throw new IllegalStateException(getClass().getSimpleName() + " instance initialized twice");
|
||||||
if (glp == null)
|
if (glp == null)
|
||||||
|
|
|
||||||
|
|
@ -1360,7 +1360,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
return splitReadsBySample(samplesList, reads);
|
return splitReadsBySample(samplesList, reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) {
|
public static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) {
|
||||||
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
|
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
|
||||||
final int sampleCount = samplesList.sampleCount();
|
final int sampleCount = samplesList.sampleCount();
|
||||||
for (int i = 0; i < sampleCount; i++)
|
for (int i = 0; i < sampleCount; i++)
|
||||||
|
|
|
||||||
|
|
@ -64,11 +64,11 @@ public class HaplotypeCallerArgumentCollection extends StandardCallerArgumentCol
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="debug", shortName="debug", doc="Print out very verbose debug information about each triggering active region", required = false)
|
@Argument(fullName="debug", shortName="debug", doc="Print out very verbose debug information about each triggering active region", required = false)
|
||||||
protected boolean DEBUG;
|
public boolean DEBUG;
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required=false)
|
@Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required=false)
|
||||||
protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
|
public boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
|
* The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
|
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
|
||||||
|
|
||||||
private static final int ALLELE_EXTENSION = 2;
|
protected static final int ALLELE_EXTENSION = 2;
|
||||||
private static final String phase01 = "0|1";
|
private static final String phase01 = "0|1";
|
||||||
private static final String phase10 = "1|0";
|
private static final String phase10 = "1|0";
|
||||||
|
|
||||||
|
|
@ -139,7 +139,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
||||||
private final List<VariantContext> calls;
|
private final List<VariantContext> calls;
|
||||||
private final Set<Haplotype> calledHaplotypes;
|
private final Set<Haplotype> calledHaplotypes;
|
||||||
|
|
||||||
protected CalledHaplotypes(final List<VariantContext> calls, final Set<Haplotype> calledHaplotypes) {
|
public CalledHaplotypes(final List<VariantContext> calls, final Set<Haplotype> calledHaplotypes) {
|
||||||
if ( calls == null ) throw new IllegalArgumentException("calls cannot be null");
|
if ( calls == null ) throw new IllegalArgumentException("calls cannot be null");
|
||||||
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
|
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
|
||||||
if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) )
|
if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) )
|
||||||
|
|
@ -531,7 +531,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
||||||
|
|
||||||
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
|
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
|
||||||
// used for genotyping.
|
// used for genotyping.
|
||||||
private ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
|
protected ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
|
||||||
final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods,
|
final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods,
|
||||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||||
final GenomeLocParser genomeLocParser,
|
final GenomeLocParser genomeLocParser,
|
||||||
|
|
@ -596,7 +596,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
||||||
* @param activeAllelesToGenotype alleles we want to ensure are scheduled for genotyping (GGA mode)
|
* @param activeAllelesToGenotype alleles we want to ensure are scheduled for genotyping (GGA mode)
|
||||||
* @return never {@code null} but perhaps an empty list if there is no variants to report.
|
* @return never {@code null} but perhaps an empty list if there is no variants to report.
|
||||||
*/
|
*/
|
||||||
private TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
protected TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
||||||
final ReadLikelihoods readLikelihoods,
|
final ReadLikelihoods readLikelihoods,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
final GenomeLoc refLoc,
|
final GenomeLoc refLoc,
|
||||||
|
|
@ -628,13 +628,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
||||||
* @param vcs a list of variant contexts
|
* @param vcs a list of variant contexts
|
||||||
* @return the list of the sources of vcs in the same order
|
* @return the list of the sources of vcs in the same order
|
||||||
*/
|
*/
|
||||||
private List<String> makePriorityList(final List<VariantContext> vcs) {
|
protected List<String> makePriorityList(final List<VariantContext> vcs) {
|
||||||
final List<String> priorityList = new LinkedList<>();
|
final List<String> priorityList = new LinkedList<>();
|
||||||
for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
|
for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
|
||||||
return priorityList;
|
return priorityList;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
|
protected List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
|
||||||
final int loc,
|
final int loc,
|
||||||
final List<VariantContext> activeAllelesToGenotype) {
|
final List<VariantContext> activeAllelesToGenotype) {
|
||||||
// the overlapping events to merge into a common reference view
|
// the overlapping events to merge into a common reference view
|
||||||
|
|
@ -687,7 +687,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
||||||
*/
|
*/
|
||||||
@Requires({"readLikelihoods!= null", "mergedVC != null"})
|
@Requires({"readLikelihoods!= null", "mergedVC != null"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
|
protected GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
|
||||||
final List<Allele> vcAlleles = mergedVC.getAlleles();
|
final List<Allele> vcAlleles = mergedVC.getAlleles();
|
||||||
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
|
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
|
||||||
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
|
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
|
||||||
|
|
|
||||||
|
|
@ -129,11 +129,22 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
|
||||||
|
|
||||||
public enum PCR_ERROR_MODEL {
|
public enum PCR_ERROR_MODEL {
|
||||||
/** no specialized PCR error model will be applied; if base insertion/deletion qualities are present they will be used */
|
/** no specialized PCR error model will be applied; if base insertion/deletion qualities are present they will be used */
|
||||||
NONE,
|
NONE(null),
|
||||||
|
/** a most aggressive model will be applied that sacrifices true positives in order to remove more false positives */
|
||||||
|
HOSTILE(1.0),
|
||||||
/** a more aggressive model will be applied that sacrifices true positives in order to remove more false positives */
|
/** a more aggressive model will be applied that sacrifices true positives in order to remove more false positives */
|
||||||
AGGRESSIVE,
|
AGGRESSIVE(2.0),
|
||||||
/** a less aggressive model will be applied that tries to maintain a high true positive rate at the expense of allowing more false positives */
|
/** a less aggressive model will be applied that tries to maintain a high true positive rate at the expense of allowing more false positives */
|
||||||
CONSERVATIVE
|
CONSERVATIVE(3.0);
|
||||||
|
|
||||||
|
private final Double rateFactor;
|
||||||
|
|
||||||
|
/** rate factor is applied to the PCR error model. Can be null to imply no correction */
|
||||||
|
PCR_ERROR_MODEL(Double rateFactor) {
|
||||||
|
this.rateFactor = rateFactor;
|
||||||
|
}
|
||||||
|
private Double getRateFactor() { return rateFactor; }
|
||||||
|
private boolean hasRateFactor() { return rateFactor != null; }
|
||||||
}
|
}
|
||||||
|
|
||||||
private final PCR_ERROR_MODEL pcrErrorModel;
|
private final PCR_ERROR_MODEL pcrErrorModel;
|
||||||
|
|
@ -421,14 +432,14 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
|
||||||
private final RepeatCovariate repeatCovariate = new RepeatLengthCovariate();
|
private final RepeatCovariate repeatCovariate = new RepeatLengthCovariate();
|
||||||
|
|
||||||
private void initializePCRErrorModel() {
|
private void initializePCRErrorModel() {
|
||||||
if ( pcrErrorModel == PCR_ERROR_MODEL.NONE )
|
if ( pcrErrorModel == PCR_ERROR_MODEL.NONE || !pcrErrorModel.hasRateFactor() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
repeatCovariate.initialize(MAX_STR_UNIT_LENGTH, MAX_REPEAT_LENGTH);
|
repeatCovariate.initialize(MAX_STR_UNIT_LENGTH, MAX_REPEAT_LENGTH);
|
||||||
|
|
||||||
pcrIndelErrorModelCache = new byte[MAX_REPEAT_LENGTH + 1];
|
pcrIndelErrorModelCache = new byte[MAX_REPEAT_LENGTH + 1];
|
||||||
|
|
||||||
final double rateFactor = pcrErrorModel == PCR_ERROR_MODEL.AGGRESSIVE ? 2.0 : 3.0;
|
final double rateFactor = pcrErrorModel.getRateFactor();
|
||||||
|
|
||||||
for( int iii = 0; iii <= MAX_REPEAT_LENGTH; iii++ )
|
for( int iii = 0; iii <= MAX_REPEAT_LENGTH; iii++ )
|
||||||
pcrIndelErrorModelCache[iii] = getErrorModelAdjustedQual(iii, rateFactor);
|
pcrIndelErrorModelCache[iii] = getErrorModelAdjustedQual(iii, rateFactor);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue