Merge pull request #863 from broadinstitute/kc_m2_initial_commit

Seeking comments on visibility changes to HaplotypeCaller-related classes

Welcome to GATK-master, MuTect2!
This commit is contained in:
Geraldine Van der Auwera 2015-03-13 21:05:39 -04:00
commit 517320092c
5 changed files with 30 additions and 19 deletions

View File

@ -72,7 +72,7 @@ import java.util.*;
* *
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org> * @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/ */
class ActiveRegionTrimmer { public class ActiveRegionTrimmer {
/** /**
* Genome location parser use in order to create and manipulate genomic intervals. * Genome location parser use in order to create and manipulate genomic intervals.
@ -115,11 +115,11 @@ class ActiveRegionTrimmer {
*/ */
@Hidden @Hidden
@Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false) @Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false)
protected int indelPadding = 150; public int indelPadding = 150;
@Hidden @Hidden
@Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false) @Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false)
protected int snpPadding = 20; public int snpPadding = 20;
/** /**
* Holds a reference the trimmer logger. * Holds a reference the trimmer logger.
@ -143,7 +143,7 @@ class ActiveRegionTrimmer {
* @throws IllegalArgumentException if the input location parser is {@code null}. * @throws IllegalArgumentException if the input location parser is {@code null}.
* @throws UserException.BadArgumentValue if any of the user argument values is invalid. * @throws UserException.BadArgumentValue if any of the user argument values is invalid.
*/ */
void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) { public void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) {
if (locParser != null) if (locParser != null)
throw new IllegalStateException(getClass().getSimpleName() + " instance initialized twice"); throw new IllegalStateException(getClass().getSimpleName() + " instance initialized twice");
if (glp == null) if (glp == null)

View File

@ -1360,7 +1360,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
return splitReadsBySample(samplesList, reads); return splitReadsBySample(samplesList, reads);
} }
private static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) { public static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) {
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>(); final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
final int sampleCount = samplesList.sampleCount(); final int sampleCount = samplesList.sampleCount();
for (int i = 0; i < sampleCount; i++) for (int i = 0; i < sampleCount; i++)

View File

@ -64,11 +64,11 @@ public class HaplotypeCallerArgumentCollection extends StandardCallerArgumentCol
@Advanced @Advanced
@Argument(fullName="debug", shortName="debug", doc="Print out very verbose debug information about each triggering active region", required = false) @Argument(fullName="debug", shortName="debug", doc="Print out very verbose debug information about each triggering active region", required = false)
protected boolean DEBUG; public boolean DEBUG;
@Advanced @Advanced
@Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required=false) @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required=false)
protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; public boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
/** /**
* The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference. * The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.

View File

@ -78,7 +78,7 @@ import java.util.*;
*/ */
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> { public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private static final int ALLELE_EXTENSION = 2; protected static final int ALLELE_EXTENSION = 2;
private static final String phase01 = "0|1"; private static final String phase01 = "0|1";
private static final String phase10 = "1|0"; private static final String phase10 = "1|0";
@ -139,7 +139,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private final List<VariantContext> calls; private final List<VariantContext> calls;
private final Set<Haplotype> calledHaplotypes; private final Set<Haplotype> calledHaplotypes;
protected CalledHaplotypes(final List<VariantContext> calls, final Set<Haplotype> calledHaplotypes) { public CalledHaplotypes(final List<VariantContext> calls, final Set<Haplotype> calledHaplotypes) {
if ( calls == null ) throw new IllegalArgumentException("calls cannot be null"); if ( calls == null ) throw new IllegalArgumentException("calls cannot be null");
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null"); if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) ) if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) )
@ -531,7 +531,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection // Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
// used for genotyping. // used for genotyping.
private ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation( protected ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods, final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList, final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
@ -596,7 +596,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param activeAllelesToGenotype alleles we want to ensure are scheduled for genotyping (GGA mode) * @param activeAllelesToGenotype alleles we want to ensure are scheduled for genotyping (GGA mode)
* @return never {@code null} but perhaps an empty list if there is no variants to report. * @return never {@code null} but perhaps an empty list if there is no variants to report.
*/ */
private TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes, protected TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
final ReadLikelihoods readLikelihoods, final ReadLikelihoods readLikelihoods,
final byte[] ref, final byte[] ref,
final GenomeLoc refLoc, final GenomeLoc refLoc,
@ -628,13 +628,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param vcs a list of variant contexts * @param vcs a list of variant contexts
* @return the list of the sources of vcs in the same order * @return the list of the sources of vcs in the same order
*/ */
private List<String> makePriorityList(final List<VariantContext> vcs) { protected List<String> makePriorityList(final List<VariantContext> vcs) {
final List<String> priorityList = new LinkedList<>(); final List<String> priorityList = new LinkedList<>();
for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource()); for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
return priorityList; return priorityList;
} }
private List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes, protected List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
final int loc, final int loc,
final List<VariantContext> activeAllelesToGenotype) { final List<VariantContext> activeAllelesToGenotype) {
// the overlapping events to merge into a common reference view // the overlapping events to merge into a common reference view
@ -687,7 +687,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/ */
@Requires({"readLikelihoods!= null", "mergedVC != null"}) @Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null") @Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) { protected GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
final List<Allele> vcAlleles = mergedVC.getAlleles(); final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));

View File

@ -129,11 +129,22 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
public enum PCR_ERROR_MODEL { public enum PCR_ERROR_MODEL {
/** no specialized PCR error model will be applied; if base insertion/deletion qualities are present they will be used */ /** no specialized PCR error model will be applied; if base insertion/deletion qualities are present they will be used */
NONE, NONE(null),
/** a most aggressive model will be applied that sacrifices true positives in order to remove more false positives */
HOSTILE(1.0),
/** a more aggressive model will be applied that sacrifices true positives in order to remove more false positives */ /** a more aggressive model will be applied that sacrifices true positives in order to remove more false positives */
AGGRESSIVE, AGGRESSIVE(2.0),
/** a less aggressive model will be applied that tries to maintain a high true positive rate at the expense of allowing more false positives */ /** a less aggressive model will be applied that tries to maintain a high true positive rate at the expense of allowing more false positives */
CONSERVATIVE CONSERVATIVE(3.0);
private final Double rateFactor;
/** rate factor is applied to the PCR error model. Can be null to imply no correction */
PCR_ERROR_MODEL(Double rateFactor) {
this.rateFactor = rateFactor;
}
private Double getRateFactor() { return rateFactor; }
private boolean hasRateFactor() { return rateFactor != null; }
} }
private final PCR_ERROR_MODEL pcrErrorModel; private final PCR_ERROR_MODEL pcrErrorModel;
@ -421,14 +432,14 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
private final RepeatCovariate repeatCovariate = new RepeatLengthCovariate(); private final RepeatCovariate repeatCovariate = new RepeatLengthCovariate();
private void initializePCRErrorModel() { private void initializePCRErrorModel() {
if ( pcrErrorModel == PCR_ERROR_MODEL.NONE ) if ( pcrErrorModel == PCR_ERROR_MODEL.NONE || !pcrErrorModel.hasRateFactor() )
return; return;
repeatCovariate.initialize(MAX_STR_UNIT_LENGTH, MAX_REPEAT_LENGTH); repeatCovariate.initialize(MAX_STR_UNIT_LENGTH, MAX_REPEAT_LENGTH);
pcrIndelErrorModelCache = new byte[MAX_REPEAT_LENGTH + 1]; pcrIndelErrorModelCache = new byte[MAX_REPEAT_LENGTH + 1];
final double rateFactor = pcrErrorModel == PCR_ERROR_MODEL.AGGRESSIVE ? 2.0 : 3.0; final double rateFactor = pcrErrorModel.getRateFactor();
for( int iii = 0; iii <= MAX_REPEAT_LENGTH; iii++ ) for( int iii = 0; iii <= MAX_REPEAT_LENGTH; iii++ )
pcrIndelErrorModelCache[iii] = getErrorModelAdjustedQual(iii, rateFactor); pcrIndelErrorModelCache[iii] = getErrorModelAdjustedQual(iii, rateFactor);