From ab1053e83c1bb038e3e8820e966b40f3362ef81b Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Mon, 22 Dec 2014 16:46:17 -0500 Subject: [PATCH] It compiles, and produces results! fixed NPE when normal contains no reads first integration test (micro) and unit tests, also rename of MuTectHC -> M2 adding in standard GATK license terms incorporated HOSTILE mode to PCR Error Correction removed tumor and normal name parameters and cleaned up internal name handling changes to allow for calling without a matched normal (technically, not true 'tumor-only' calling). Used for panel-of-normals creation additional regression tests, based on DREAM data. Removed accidental addition of TandemRepeatAnnotator to default annotations updated MD5 based on run from GSA4 to fix bamboo issue reverted unneeded visibility changes --- .../haplotypecaller/ActiveRegionTrimmer.java | 8 +++---- .../haplotypecaller/HaplotypeCaller.java | 2 +- .../HaplotypeCallerArgumentCollection.java | 4 ++-- .../HaplotypeCallerGenotypingEngine.java | 14 ++++++------- .../PairHMMLikelihoodCalculationEngine.java | 21 ++++++++++++++----- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java index 3fa393eb0..6a8564b28 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java @@ -72,7 +72,7 @@ import java.util.*; * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -class ActiveRegionTrimmer { +public class ActiveRegionTrimmer { /** * Genome location parser use in order to create and manipulate genomic intervals. @@ -115,11 +115,11 @@ class ActiveRegionTrimmer { */ @Hidden @Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false) - protected int indelPadding = 150; + public int indelPadding = 150; @Hidden @Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false) - protected int snpPadding = 20; + public int snpPadding = 20; /** * Holds a reference the trimmer logger. @@ -143,7 +143,7 @@ class ActiveRegionTrimmer { * @throws IllegalArgumentException if the input location parser is {@code null}. * @throws UserException.BadArgumentValue if any of the user argument values is invalid. */ - void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) { + public void initialize(final GenomeLocParser glp, final boolean debug, final boolean isGGA, final boolean emitReferenceConfidence) { if (locParser != null) throw new IllegalStateException(getClass().getSimpleName() + " instance initialized twice"); if (glp == null) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 8af31a22c..f5537ee63 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -1360,7 +1360,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In return splitReadsBySample(samplesList, reads); } - private static Map> splitReadsBySample( final SampleList samplesList, final Collection reads ) { + public static Map> splitReadsBySample( final SampleList samplesList, final Collection reads ) { final Map> returnMap = new HashMap<>(); final int sampleCount = samplesList.sampleCount(); for (int i = 0; i < sampleCount; i++) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java index c68245e7f..f9f08df8b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java @@ -64,11 +64,11 @@ public class HaplotypeCallerArgumentCollection extends StandardCallerArgumentCol @Advanced @Argument(fullName="debug", shortName="debug", doc="Print out very verbose debug information about each triggering active region", required = false) - protected boolean DEBUG; + public boolean DEBUG; @Advanced @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required=false) - protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; + public boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; /** * The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference. diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 7d439428b..0c20f4661 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -78,7 +78,7 @@ import java.util.*; */ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine { - private static final int ALLELE_EXTENSION = 2; + protected static final int ALLELE_EXTENSION = 2; private static final String phase01 = "0|1"; private static final String phase10 = "1|0"; @@ -139,7 +139,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine calls; private final Set calledHaplotypes; - protected CalledHaplotypes(final List calls, final Set calledHaplotypes) { + public CalledHaplotypes(final List calls, final Set calledHaplotypes) { if ( calls == null ) throw new IllegalArgumentException("calls cannot be null"); if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null"); if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) ) @@ -531,7 +531,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine prepareReadAlleleLikelihoodsForAnnotation( + protected ReadLikelihoods prepareReadAlleleLikelihoodsForAnnotation( final ReadLikelihoods readHaplotypeLikelihoods, final Map> perSampleFilteredReadList, final GenomeLocParser genomeLocParser, @@ -596,7 +596,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine decomposeHaplotypesIntoVariantContexts(final List haplotypes, + protected TreeSet decomposeHaplotypesIntoVariantContexts(final List haplotypes, final ReadLikelihoods readLikelihoods, final byte[] ref, final GenomeLoc refLoc, @@ -628,13 +628,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine makePriorityList(final List vcs) { + protected List makePriorityList(final List vcs) { final List priorityList = new LinkedList<>(); for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource()); return priorityList; } - private List getVCsAtThisLocation(final List haplotypes, + protected List getVCsAtThisLocation(final List haplotypes, final int loc, final List activeAllelesToGenotype) { // the overlapping events to merge into a common reference view @@ -687,7 +687,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine readLikelihoods, final VariantContext mergedVC, final List noCallAlleles ) { + protected GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods readLikelihoods, final VariantContext mergedVC, final List noCallAlleles ) { final List vcAlleles = mergedVC.getAlleles(); final AlleleList alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java index 7b2a5647c..0f04e5139 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java @@ -129,11 +129,22 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula public enum PCR_ERROR_MODEL { /** no specialized PCR error model will be applied; if base insertion/deletion qualities are present they will be used */ - NONE, + NONE(null), + /** a most aggressive model will be applied that sacrifices true positives in order to remove more false positives */ + HOSTILE(1.0), /** a more aggressive model will be applied that sacrifices true positives in order to remove more false positives */ - AGGRESSIVE, + AGGRESSIVE(2.0), /** a less aggressive model will be applied that tries to maintain a high true positive rate at the expense of allowing more false positives */ - CONSERVATIVE + CONSERVATIVE(3.0); + + private final Double rateFactor; + + /** rate factor is applied to the PCR error model. Can be null to imply no correction */ + PCR_ERROR_MODEL(Double rateFactor) { + this.rateFactor = rateFactor; + } + private Double getRateFactor() { return rateFactor; } + private boolean hasRateFactor() { return rateFactor != null; } } private final PCR_ERROR_MODEL pcrErrorModel; @@ -421,14 +432,14 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula private final RepeatCovariate repeatCovariate = new RepeatLengthCovariate(); private void initializePCRErrorModel() { - if ( pcrErrorModel == PCR_ERROR_MODEL.NONE ) + if ( pcrErrorModel == PCR_ERROR_MODEL.NONE || !pcrErrorModel.hasRateFactor() ) return; repeatCovariate.initialize(MAX_STR_UNIT_LENGTH, MAX_REPEAT_LENGTH); pcrIndelErrorModelCache = new byte[MAX_REPEAT_LENGTH + 1]; - final double rateFactor = pcrErrorModel == PCR_ERROR_MODEL.AGGRESSIVE ? 2.0 : 3.0; + final double rateFactor = pcrErrorModel.getRateFactor(); for( int iii = 0; iii <= MAX_REPEAT_LENGTH; iii++ ) pcrIndelErrorModelCache[iii] = getErrorModelAdjustedQual(iii, rateFactor);