Lots of small improvements to Mutect2 code

2016-08-04 01:50:13 -04:00 · 2016-08-04 01:50:13 -04:00 · 01142dfb1c
parent ebceb06284
commit 01142dfb1c
4 changed files with 446 additions and 599 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java
@ -51,12 +51,41 @@
 package org.broadinstitute.gatk.tools.walkers.cancer.m2;
 import htsjdk.variant.variantcontext.VariantContext;
 import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
 import org.broadinstitute.gatk.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
-import org.broadinstitute.gatk.utils.commandline.Advanced;
+import org.broadinstitute.gatk.utils.commandline.*;
-import org.broadinstitute.gatk.utils.commandline.Argument;
+
-import org.broadinstitute.gatk.utils.commandline.Hidden;
+import java.util.Collections;
 import java.util.List;
 public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection {
    /***************************************/
    // Reference Metadata inputs
    /***************************************/
    /**
     * MuTect2 has the ability to use COSMIC data in conjunction with dbSNP to adjust the threshold for evidence of a variant
     * in the normal.  If a variant is present in dbSNP, but not in COSMIC, then more evidence is required from the normal
     * sample to prove the variant is not present in germline.
     */
    @Input(fullName="cosmic", shortName = "cosmic", doc="VCF file of COSMIC sites", required=false)
    public List<RodBinding<VariantContext>> cosmicRod = Collections.emptyList();
    /**
     * A panel of normals can be a useful (optional) input to help filter out commonly seen sequencing noise that may appear as low allele-fraction somatic variants.
     */
    @Input(fullName="normal_panel", shortName = "PON", doc="VCF file of sites observed in normal", required=false)
    public List<RodBinding<VariantContext>> normalPanelRod = Collections.emptyList();
    /**
     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
     * dbSNP overlap is only used to require more evidence of absence in the normal if the variant in question has been seen before in germline.
     */
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    @Advanced
    @Argument(fullName="m2debug", shortName="m2debug", doc="Print out very verbose M2 debug information", required = false)
    public boolean M2_DEBUG = false;
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
@ -201,7 +201,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    // the genotyping engine
    protected HaplotypeCallerGenotypingEngine genotypingEngine = null;
    private byte MIN_TAIL_QUALITY;
    private double log10GlobalReadMismappingRate;
@ -214,9 +213,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    @ArgumentCollection
    protected LikelihoodEngineArgumentCollection LEAC = new LikelihoodEngineArgumentCollection();
    @Argument(fullName = "debug_read_name", required = false, doc="trace this read name through the calling process")
-    public String DEBUG_READ_NAME = null;
+    protected String DEBUG_READ_NAME = null;
    @Hidden
    @Advanced
@ -224,24 +222,81 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    final public int MQthreshold = 20;
-    /***************************************/
+    public RodBinding<VariantContext> getDbsnpRodBinding() { return MTAC.dbsnp.dbsnp; }
    // Reference Metadata inputs
    /***************************************/
    /**
     * MuTect2 has the ability to use COSMIC data in conjunction with dbSNP to adjust the threshold for evidence of a variant
     * in the normal.  If a variant is present in dbSNP, but not in COSMIC, then more evidence is required from the normal
     * sample to prove the variant is not present in germline.
     */
    @Input(fullName="cosmic", shortName = "cosmic", doc="VCF file of COSMIC sites", required=false)
    public List<RodBinding<VariantContext>> cosmicRod = Collections.emptyList();
    /**
     * A panel of normals can be a useful (optional) input to help filter out commonly seen sequencing noise that may appear as low allele-fraction somatic variants.
     */
    @Input(fullName="normal_panel", shortName = "PON", doc="VCF file of sites observed in normal", required=false)
    public List<RodBinding<VariantContext>> normalPanelRod = Collections.emptyList();
    private HaplotypeBAMWriter haplotypeBAMWriter;
    /**
     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
     *  Records that are filtered in the comp track will be ignored.
     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
     */
    @Advanced
    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
    /**
     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
     */
    @Advanced
    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBySample", "TandemRepeatAnnotator", "OxoGReadCounts"}));
    /**
     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
     * so annotations will be excluded even if they are explicitly included with the other options.
     */
    @Advanced
    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
    protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions"}));
    /**
     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
     */
    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
    protected String[] annotationClassesToUse = { };
    @Output(doc="File to which variants should be written")
    protected VariantContextWriter vcfWriter = null;
    /**
     * Active region trimmer reference.
     */
    @ArgumentCollection
    protected ActiveRegionTrimmer trimmer = new ActiveRegionTrimmer();
    @Hidden
    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
    protected String keepRG = null;
    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
    public byte MIN_BASE_QUALTY_SCORE = 10;
    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.HOSTILE;
    @Hidden
    @Argument(fullName="errorCorrectReads", shortName="errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly.  May cause fundamental problems with the assembly graph itself", required=false)
    protected boolean errorCorrectReads = false;
    @Hidden
    @Argument(fullName="captureAssemblyFailureBAM", shortName="captureAssemblyFailureBAM", doc="If specified, we will write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false)
    protected boolean captureAssemblyFailureBAM = false;
    @Advanced
    @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false)
    protected boolean dontUseSoftClippedBases = false;
    @Hidden
    @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code.  Useful for benchmarking and scalability testing", required=false)
    protected boolean justDetermineActiveRegions = false;
    // reference base padding size
    private static final int REFERENCE_PADDING = 500;
    private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
    private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
    private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
    @Override
    public void initialize() {
@ -250,15 +305,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSAMFileSamples(getToolkit().getSAMFileHeader())));
        // MUTECT: check that we have at least one tumor bam
-        for(SAMReaderID id : getToolkit().getReadsDataSource().getReaderIDs()) {
+        for(final SAMReaderID id : getToolkit().getReadsDataSource().getReaderIDs()) {
            if (id.getTags().getPositionalTags().size() == 0) {
                throw new RuntimeException("BAMs must be tagged as either 'tumor' or 'normal'");
            }
            // only supports single-sample BAMs (ie first read group is representative)
-            String bamSampleName = getToolkit().getReadsDataSource().getHeader(id).getReadGroups().get(0).getSample();
+            final String bamSampleName = getToolkit().getReadsDataSource().getHeader(id).getReadGroups().get(0).getSample();
-            for(String tag : id.getTags().getPositionalTags()) {
+            for(final String tag : id.getTags().getPositionalTags()) {
                if (BAM_TAG_TUMOR.equalsIgnoreCase(tag)) {
                    tumorSAMReaderIDs.add(id);
                    if (tumorSampleName == null) {
@ -330,9 +385,9 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        final MergeVariantsAcrossHaplotypes variantMerger = new MergeVariantsAcrossHaplotypes();
        final GenomeAnalysisEngine toolkit = getToolkit();
        final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
-        genotypingEngine = new SomaticGenotypingEngine( MTAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(), MTAC, logger), !doNotRunPhysicalPhasing, MTAC);
+        genotypingEngine = new SomaticGenotypingEngine( MTAC, samplesList, toolkit.getGenomeLocParser(), !doNotRunPhysicalPhasing, MTAC,
                tumorSampleName, normalSampleName, DEBUG_READ_NAME);
        genotypingEngine.setCrossHaplotypeEventMerger(variantMerger);
        genotypingEngine.setAnnotationEngine(annotationEngine);
@ -353,7 +408,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        trimmer.snpPadding = 50;
        samplesList = toolkit.getReadSampleList();
-        Set<String> sampleSet = SampleListUtils.asSet(samplesList);
+        final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
        if( MTAC.CONTAMINATION_FRACTION_FILE != null )
            MTAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(MTAC.CONTAMINATION_FRACTION_FILE, MTAC.CONTAMINATION_FRACTION, sampleSet, logger));
@ -367,7 +422,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
        final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
-        Set<VCFHeaderLine> headerInfo = new HashSet<>();
+        final Set<VCFHeaderLine> headerInfo = new HashSet<>();
        // all annotation fields from VariantAnnotatorEngine
        headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
@ -383,7 +438,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        headerInfo.addAll(getM2HeaderLines());
        headerInfo.addAll(getSampleHeaderLines());
-        List<String> outputSampleNames = getOutputSampleNames();
+        final List<String> outputSampleNames = getOutputSampleNames();
        vcfWriter.writeHeader(new VCFHeader(headerInfo, outputSampleNames));
@ -391,7 +446,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    private Set<VCFHeaderLine> getM2HeaderLines(){
-        Set<VCFHeaderLine> headerInfo = new HashSet<>();
+        final Set<VCFHeaderLine> headerInfo = new HashSet<>();
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NORMAL_LOD_KEY));
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.TUMOR_LOD_KEY));
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.PANEL_OF_NORMALS_COUNT_KEY));
@ -430,21 +485,21 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    private Set<VCFHeaderLine> getSampleHeaderLines(){
-        Set<VCFHeaderLine> sampleLines = new HashSet<>();
+        final Set<VCFHeaderLine> sampleLines = new HashSet<>();
        if (printTCGAsampleHeader) {
            //NOTE: This will only list the first bam file for each tumor/normal sample if there is more than one
-            Map<String, String> normalSampleHeaderAttributes = new HashMap<>();
+            final Map<String, String> normalSampleHeaderAttributes = new HashMap<>();
            normalSampleHeaderAttributes.put("ID", "NORMAL");
            normalSampleHeaderAttributes.put("SampleName", normalSampleName);
            if (normalSAMReaderIDs.iterator().hasNext() && !getToolkit().getArguments().disableCommandLineInVCF)
                normalSampleHeaderAttributes.put("File", normalSAMReaderIDs.iterator().next().getSamFilePath());
-            VCFSimpleHeaderLine normalSampleHeader = new VCFSimpleHeaderLine("SAMPLE", normalSampleHeaderAttributes);
+            final VCFSimpleHeaderLine normalSampleHeader = new VCFSimpleHeaderLine("SAMPLE", normalSampleHeaderAttributes);
-            Map<String, String> tumorSampleHeaderAttributes = new HashMap<>();
+            final Map<String, String> tumorSampleHeaderAttributes = new HashMap<>();
            tumorSampleHeaderAttributes.put("ID", "TUMOR");
            tumorSampleHeaderAttributes.put("SampleName", tumorSampleName);
            if (tumorSAMReaderIDs.iterator().hasNext() && !getToolkit().getArguments().disableCommandLineInVCF)
                tumorSampleHeaderAttributes.put("File", tumorSAMReaderIDs.iterator().next().getSamFilePath());
-            VCFSimpleHeaderLine tumorSampleHeader = new VCFSimpleHeaderLine("SAMPLE", tumorSampleHeaderAttributes);
+            final VCFSimpleHeaderLine tumorSampleHeader = new VCFSimpleHeaderLine("SAMPLE", tumorSampleHeaderAttributes);
            sampleLines.add(normalSampleHeader);
            sampleLines.add(tumorSampleHeader);
@ -455,7 +510,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    private List<String> getOutputSampleNames(){
        if (printTCGAsampleHeader) {
         //Already checked for exactly 1 tumor and 1 normal in printTCGAsampleHeader assignment in initialize()
-            List<String> sampleNamePlaceholders = new ArrayList<>(2);
+            final List<String> sampleNamePlaceholders = new ArrayList<>(2);
            sampleNamePlaceholders.add("TUMOR");
            sampleNamePlaceholders.add("NORMAL");
            return sampleNamePlaceholders;
@ -466,14 +521,14 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    @Override
-    public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    public ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
        if( context == null || context.getBasePileup().isEmpty() )
            // if we don't have any data, just abort early
            return new ActivityProfileState(ref.getLocus(), 0.0);
        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
-        AlignmentContext tumorContext = splitContexts.get(tumorSampleName);
+        final AlignmentContext tumorContext = splitContexts.get(tumorSampleName);
-        AlignmentContext normalContext = splitContexts.get(normalSampleName);
+        final AlignmentContext normalContext = splitContexts.get(normalSampleName);
        // if there are no tumor reads... there is no activity!
        if (tumorContext == null) {
@ -481,7 +536,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
        // KCIBUL -- this method was inlined and modified from ReferenceConfidenceModel
-        ReadBackedPileup tumorPileup = tumorContext.getBasePileup().getMappingFilteredPileup(MQthreshold);
+        final ReadBackedPileup tumorPileup = tumorContext.getBasePileup().getMappingFilteredPileup(MQthreshold);
        final double[] tumorGLs = calcGenotypeLikelihoodsOfRefVsAny(tumorPileup, ref.getBase(), MIN_BASE_QUALTY_SCORE);
        final double tumorLod = tumorGLs[1] - tumorGLs[0];
@ -495,7 +550,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            // in any case, we have to handle the case where there is no normal (and thus no normal context) which is
            // different than having a normal but having no reads (where we should not enter the active region)
            if (normalSampleName != null && normalContext != null) {
-                int nonRefInNormal = getCountOfNonRefEvents(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE);
+                final int nonRefInNormal = getCountOfNonRefEvents(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE);
                final double[] normalGLs = calcGenotypeLikelihoodsOfRefVsAny(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, 0.5f);
                final double normalLod = normalGLs[0] - normalGLs[1];
@ -528,13 +583,12 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // No reads here so nothing to do!
        if( originalActiveRegion.size() == 0 ) { return referenceModelForNoVariation(originalActiveRegion, true); }
        logReadInfo(DEBUG_READ_NAME, originalActiveRegion.getReads(), "Present in original active region");
        // create the assembly using just high quality reads (Q20 or higher).  We want to use lower
        // quality reads in the PairHMM (and especially in the normal) later, so we can't use a ReadFilter
-        ActiveRegion assemblyActiveRegion = new ActiveRegion(originalActiveRegion.getLocation(), originalActiveRegion.getSupportingStates(),originalActiveRegion.isActive(), getToolkit().getGenomeLocParser(), originalActiveRegion.getExtension());
+        final ActiveRegion assemblyActiveRegion = new ActiveRegion(originalActiveRegion.getLocation(), originalActiveRegion.getSupportingStates(),originalActiveRegion.isActive(), getToolkit().getGenomeLocParser(), originalActiveRegion.getExtension());
-        for (GATKSAMRecord rec : originalActiveRegion.getReads()) {
+        for (final GATKSAMRecord rec : originalActiveRegion.getReads()) {
            if (rec.getMappingQuality() >= MQthreshold ) {
                assemblyActiveRegion.add(rec);
            }
@ -558,18 +612,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // Stop the trimming madness!!!
        if (!trimmingResult.isVariationPresent())
            return referenceModelForNoVariation(originalActiveRegion,false);
        logReadInfo(DEBUG_READ_NAME, trimmingResult.getCallableRegion().getReads(), "Present in trimming result");
        final AssemblyResultSet assemblyResult =
                trimmingResult.needsTrimming() ? untrimmedAssemblyResult.trimTo(trimmingResult.getCallableRegion()) : untrimmedAssemblyResult;
 //        final AssemblyResultSet assemblyResult = untrimmedAssemblyResult;
        // after talking to Ryan -- they grab the reads out of the assembly (and trim then) to pass into the PairHMM
        // because at one point they were trying error correcting of the reads based on the haplotypes.. but that is not
        // working out, so it's safe for us just to take the reads
 //
        final ActiveRegion regionForGenotyping = assemblyResult.getRegionForGenotyping();
        logReadInfo(DEBUG_READ_NAME, regionForGenotyping.getReads(), "Present in region for genotyping");
@ -577,10 +624,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            haplotypeBAMWriter.addDroppedReadsFromDelta(DroppedReadsTracker.Reason.TRIMMMED, originalActiveRegion.getReads(), regionForGenotyping.getReads());
        }
 //
 //        final ActiveRegion regionForGenotyping = trimmingResult.getCallableRegion();
 //        final ActiveRegion regionForGenotyping = originalActiveRegion;
        // filter out reads from genotyping which fail mapping quality based criteria
        //TODO - why don't do this before any assembly is done? Why not just once at the beginning of this method
@ -594,7 +637,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
        final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample(filteredReads);
        logReadInfo(DEBUG_READ_NAME, regionForGenotyping.getReads(), "Present in region for genotyping after filtering reads");
        // abort early if something is out of the acceptable range
@ -612,17 +654,17 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        final List<Haplotype> haplotypes = assemblyResult.getHaplotypeList();
        final Map<String,List<GATKSAMRecord>> reads = splitReadsBySample( regionForGenotyping.getReads() );
-        for (List<GATKSAMRecord> rec : reads.values()) {
+        for (final List<GATKSAMRecord> rec : reads.values()) {
            logReadInfo(DEBUG_READ_NAME, rec, "Present after splitting assemblyResult by sample");
        }
        final HashMap<String, Integer> ARreads_origNormalMQ = new HashMap<>();
-        for (GATKSAMRecord read : regionForGenotyping.getReads()) {
+        for (final GATKSAMRecord read : regionForGenotyping.getReads()) {
            ARreads_origNormalMQ.put(read.getReadName(), read.getMappingQuality());
        }
        // modify MAPQ scores in normal to be high so that we don't do any base quality score capping
-        for(GATKSAMRecord rec : regionForGenotyping.getReads()) {
+        for(final GATKSAMRecord rec : regionForGenotyping.getReads()) {
            if (isReadFromNormal(rec)) {
                rec.setMappingQuality(60);
            }
@ -647,8 +689,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
        readLikelihoods.changeReads(readRealignments);
-
+        for (final GATKSAMRecord rec : readRealignments.keySet()) {
        for (GATKSAMRecord rec : readRealignments.keySet()) {
            logReadInfo(DEBUG_READ_NAME, rec, "Present after computing read likelihoods");
        }
@ -666,15 +707,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                assemblyResult.getFullReferenceWithPadding(),
                assemblyResult.getPaddedReferenceLoc(),
                regionForGenotyping.getLocation(),
                getToolkit().getGenomeLocParser(),
                metaDataTracker,
-                givenAlleles, false ,
+                givenAlleles);
                tumorSampleName,
                normalSampleName,
                dbsnp.dbsnp,
                cosmicRod,
                DEBUG_READ_NAME
                );
        if ( MTAC.bamWriter != null ) {
            final Set<Haplotype> calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes());
@ -695,16 +729,16 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        if( MTAC.DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }
-        List<VariantContext> annotatedCalls = new ArrayList<>();
+        final List<VariantContext> annotatedCalls = new ArrayList<>();
-        int eventCount = calledHaplotypes.getCalls().size();
+        final int eventCount = calledHaplotypes.getCalls().size();
        Integer minEventDistance = null;
        Integer maxEventDistance = null;
        Integer lastPosition = null;
-        for (VariantContext vc : calledHaplotypes.getCalls()) {
+        for (final VariantContext vc : calledHaplotypes.getCalls()) {
            if (lastPosition == null) {
                lastPosition = vc.getStart();
            } else {
-                int dist = Math.abs(vc.getStart() - lastPosition);
+                final int dist = Math.abs(vc.getStart() - lastPosition);
                if (maxEventDistance == null || dist > maxEventDistance) {
                    maxEventDistance = dist;
                }
@ -713,23 +747,23 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                }
            }
        }
-        Map<String, Object> eventDistanceAttributes = new HashMap<>();
+        final Map<String, Object> eventDistanceAttributes = new HashMap<>();
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, eventCount);
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_DISTANCE_MIN_KEY, minEventDistance);
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY, maxEventDistance);
        // can we do this with the Annotation classes instead?
-        for (VariantContext originalVC : calledHaplotypes.getCalls()) {
+        for (final VariantContext originalVC : calledHaplotypes.getCalls()) {
-            VariantContextBuilder vcb = new VariantContextBuilder(originalVC);
+            final VariantContextBuilder vcb = new VariantContextBuilder(originalVC);
-            Map<String, Object> attributes = new HashMap<>(originalVC.getAttributes());
+            final Map<String, Object> attributes = new HashMap<>(originalVC.getAttributes());
            attributes.putAll(eventDistanceAttributes);
            vcb.attributes(attributes);
-            Set<String> filters = new HashSet<>(originalVC.getFilters());
+            final Set<String> filters = new HashSet<>(originalVC.getFilters());
-            double tumorLod = originalVC.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, -1);
+            final double tumorLod = originalVC.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, -1);
            if (tumorLod < MTAC.TUMOR_LOD_THRESHOLD) {
                filters.add(GATKVCFConstants.TUMOR_LOD_FILTER_NAME);
            }
@ -739,22 +773,12 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                 filters.addAll(calculateFilters(metaDataTracker, originalVC, eventDistanceAttributes));
            }
-            if (filters.size() > 0) {
+            vcb.filters(filters.isEmpty() ? VariantContext.PASSES_FILTERS : filters);
                vcb.filters(filters);
            } else {
                vcb.passFilters();
            }
            if (printTCGAsampleHeader) {
-                GenotypesContext genotypesWithBamSampleNames = originalVC.getGenotypes();
+                final Genotype tumorGenotype = new GenotypeBuilder(originalVC.getGenotype(tumorSampleName)).name("TUMOR").make();
-                List<Genotype> renamedGenotypes = new ArrayList<>();
+                final Genotype normalGenotype = new GenotypeBuilder(originalVC.getGenotype(normalSampleName)).name("NORMAL").make();
-                GenotypeBuilder GTbuilder = new GenotypeBuilder(genotypesWithBamSampleNames.get(tumorSampleName));
+                vcb.genotypes(Arrays.asList(tumorGenotype, normalGenotype));
                GTbuilder.name("TUMOR");
                renamedGenotypes.add(GTbuilder.make());
                GTbuilder = new GenotypeBuilder(genotypesWithBamSampleNames.get(normalSampleName));
                GTbuilder.name("NORMAL");
                renamedGenotypes.add(GTbuilder.make());
                vcb.genotypes(renamedGenotypes);
            }
            annotatedCalls.add(vcb.make());
@ -765,15 +789,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return annotatedCalls;
    }
-    private Set<String> calculateFilters(RefMetaDataTracker metaDataTracker, VariantContext vc, Map<String, Object> eventDistanceAttributes) {
+    private Set<String> calculateFilters(final RefMetaDataTracker metaDataTracker, final VariantContext vc, final Map<String, Object> eventDistanceAttributes) {
-        Set<String> filters = new HashSet<>();
+        final Set<String> filters = new HashSet<>();
-        Integer eventCount = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY);
+        final Integer eventCount = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY);
-        Integer maxEventDistance = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY);
+        final Integer maxEventDistance = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY);
-        Collection<VariantContext> panelOfNormalsVC = metaDataTracker.getValues(normalPanelRod,
+        final Collection<VariantContext> panelOfNormalsVC = metaDataTracker.getValues(MTAC.normalPanelRod,
                getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart()));
-        VariantContext ponVc = panelOfNormalsVC.isEmpty()?null:panelOfNormalsVC.iterator().next();
+        final VariantContext ponVc = panelOfNormalsVC.isEmpty()?null:panelOfNormalsVC.iterator().next();
        if (ponVc != null) {
            filters.add(GATKVCFConstants.PON_FILTER_NAME);
@ -788,13 +812,13 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        double normalF = 0;
        int normalAltQualityScoreSum = 0;
        if (hasNormal()) {
-            Genotype normalGenotype = vc.getGenotype(normalSampleName);
+            final Genotype normalGenotype = vc.getGenotype(normalSampleName);
            // NOTE: how do we get the non-ref depth here?
            normalAltCounts = normalGenotype.getAD()[1];
            normalF = (Double) normalGenotype.getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY);
-            Object qss = normalGenotype.getExtendedAttribute(GATKVCFConstants.QUALITY_SCORE_SUM_KEY);
+            final Object qss = normalGenotype.getExtendedAttribute(GATKVCFConstants.QUALITY_SCORE_SUM_KEY);
            if (qss != null) {
                normalAltQualityScoreSum = (Integer) ((Object[]) qss)[1];
            } else {
@ -819,11 +843,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // such as ACTACTACT -> ACTACT, are overwhelmingly false positives so we
        // hard filter them out by default
        if (vc.isIndel()) {
-            ArrayList rpa = (ArrayList) vc.getAttribute(GATKVCFConstants.REPEATS_PER_ALLELE_KEY);
+            final ArrayList rpa = (ArrayList) vc.getAttribute(GATKVCFConstants.REPEATS_PER_ALLELE_KEY);
-            String ru = vc.getAttributeAsString(GATKVCFConstants.REPEAT_UNIT_KEY, "");
+            final String ru = vc.getAttributeAsString(GATKVCFConstants.REPEAT_UNIT_KEY, "");
            if (rpa != null && rpa.size() > 1 && ru.length() > 1) {
-                int refCount = (Integer) rpa.get(0);
+                final int refCount = (Integer) rpa.get(0);
-                int altCount = (Integer) rpa.get(1);
+                final int altCount = (Integer) rpa.get(1);
                if (refCount - altCount == 1) {
                    filters.add(GATKVCFConstants.STR_CONTRACTION_FILTER_NAME);
@ -840,10 +864,10 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // clustered read position filter
        if (MTAC.ENABLE_CLUSTERED_READ_POSITION_FILTER){
-            Double tumorFwdPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_LEFT_OFFSET_KEY);
+            final Double tumorFwdPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_LEFT_OFFSET_KEY);
-            Double tumorRevPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_RIGHT_OFFSET_KEY);
+            final Double tumorRevPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_RIGHT_OFFSET_KEY);
-            Double tumorFwdPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_LEFT_OFFSET_KEY);
+            final Double tumorFwdPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_LEFT_OFFSET_KEY);
-            Double tumorRevPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_RIGHT_OFFSET_KEY);
+            final Double tumorRevPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_RIGHT_OFFSET_KEY);
            //If the variant is near the read end (median threshold) and the positions are very similar (MAD threshold) then filter
            if ( (tumorFwdPosMedian != null && tumorFwdPosMedian <= MTAC.PIR_MEDIAN_THRESHOLD && tumorFwdPosMAD != null && tumorFwdPosMAD <= MTAC.PIR_MAD_THRESHOLD) ||
                    (tumorRevPosMedian != null && tumorRevPosMedian <= MTAC.PIR_MEDIAN_THRESHOLD && tumorRevPosMAD != null && tumorRevPosMAD <= MTAC.PIR_MAD_THRESHOLD))
@ -866,14 +890,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
     */
    protected double[] calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final double f) {
        final double[] genotypeLikelihoods = new double[2];
-        int AA = 0, AB=1;
+        final int AA = 0;
        final int AB=1;
        for( final PileupElement p : pileup ) {
            final byte qual = (p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual());
            if( p.isDeletion() || qual > minBaseQual ) {
                // TODO: why not use base qualities here?
                //double pobs = QualityUtils.qualToErrorProbLog10(qual);
-                double pobs = 1.0d - pow(10, (30 / -10.0));
+                final double pobs = 1.0d - pow(10, (30 / -10.0));
                if( isNonRef(refBase, p)) {
                    genotypeLikelihoods[AB] += Math.log10(f*pobs + (1-f)*pobs/3.0d);
                    genotypeLikelihoods[AA] += Math.log10((1-pobs)/3);
@ -905,7 +930,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    protected double[] calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual) {
-        double f = calculateF(pileup, refBase, minBaseQual);
+        final double f = calculateF(pileup, refBase, minBaseQual);
        return calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, minBaseQual, f);
    }
@ -923,11 +948,10 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                }
            }
        }
-        double f = (double) altCount / ((double) refCount + (double) altCount);
+        return (double) altCount / (refCount + altCount);
        return f;
    }
-    private boolean isNonRef(byte refBase, PileupElement p) {
+    private boolean isNonRef(final byte refBase, final PileupElement p) {
        return p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart() || p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip();
    }
@ -960,8 +984,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return readsToRemove;
    }
-    private static GATKSAMRecord findReadByName(Collection<GATKSAMRecord> reads, String name) {
+    private static GATKSAMRecord findReadByName(final Collection<GATKSAMRecord> reads, final String name) {
-        for(GATKSAMRecord read : reads) {
+        for(final GATKSAMRecord read : reads) {
            if (name.equals(read.getReadName())) return read;
        }
        return null;
@ -1022,7 +1046,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    @Override
-    public Integer reduce(List<VariantContext> callsInRegion, Integer numCalledRegions) {
+    public Integer reduce(final List<VariantContext> callsInRegion, final Integer numCalledRegions) {
        for( final VariantContext call : callsInRegion ) {
            vcfWriter.add( call );
        }
@ -1030,7 +1054,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }
    @Override
-    public void onTraversalDone(Integer result) {
+    public void onTraversalDone(final Integer result) {
 //        if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it
 //        referenceConfidenceModel.close();
        //TODO remove the need to call close here for debugging, the likelihood output stream should be managed
@ -1045,114 +1069,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
    public boolean alwaysAppendDbsnpId() { return false; }
    /**
     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
     * dbSNP overlap is only used to require more evidence of absence in the normal if the variant in question has been seen before in germline.
     */
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
    /**
     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
     *  Records that are filtered in the comp track will be ignored.
     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
     */
    @Advanced
    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
    /**
     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
     */
    @Advanced
    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
 //    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"ClippingRankSumTest", "DepthPerSampleHC"}));
 //    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBy ruSample", "TandemRepeatAnnotator",
 //        "RMSMappingQuality","MappingQualityRankSumTest","FisherStrand","StrandOddsRatio","ReadPosRankSumTest","QualByDepth", "Coverage"}));
    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBySample", "TandemRepeatAnnotator", "OxoGReadCounts"}));
    /**
     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
     * so annotations will be excluded even if they are explicitly included with the other options.
     */
    @Advanced
    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
    protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions"}));
    /**
     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
     */
    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
    //protected String[] annotationGroupsToUse = { StandardAnnotation.class.getSimpleName() };
    protected String[] annotationClassesToUse = { };
    /**
     * A raw, unfiltered, highly sensitive callset in VCF format.
     */
    @Output(doc="File to which variants should be written")
    protected VariantContextWriter vcfWriter = null;
    /**
     * Active region trimmer reference.
     */
    @ArgumentCollection
    protected ActiveRegionTrimmer trimmer = new ActiveRegionTrimmer();
    @Hidden
    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
    protected String keepRG = null;
    /**
     * The minimum confidence needed for a given base for it to be used in variant calling.
     */
    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
    public byte MIN_BASE_QUALTY_SCORE = 10;
 // PAIR-HMM-Related Goodness
 //    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE;
 //    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.AGGRESSIVE;
    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.HOSTILE;
    // Parameters to control read error correction
    @Hidden
    @Argument(fullName="errorCorrectReads", shortName="errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly.  May cause fundamental problems with the assembly graph itself", required=false)
    protected boolean errorCorrectReads = false;
    @Hidden
    @Argument(fullName="captureAssemblyFailureBAM", shortName="captureAssemblyFailureBAM", doc="If specified, we will write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false)
    protected boolean captureAssemblyFailureBAM = false;
    @Advanced
    @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false)
    protected boolean dontUseSoftClippedBases = false;
    @Hidden
    @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code.  Useful for benchmarking and scalability testing", required=false)
    protected boolean justDetermineActiveRegions = false;
    // reference base padding size
    private static final int REFERENCE_PADDING = 500;
    private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
    private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
    private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
    /**
     * High-level function that runs the assembler on the active region reads,
     * returning a data structure with the resulting information needed
@ -1224,14 +1140,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            if( !clippedRead.isEmpty() && clippedRead.getCigar().getReadLength() > 0 ) {
                clippedRead = ReadClipper.hardClipToRegion(clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop());
                if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
                    //logger.info("Keeping read " + clippedRead + " start " + clippedRead.getAlignmentStart() + " end " + clippedRead.getAlignmentEnd());
                    readsToUse.add(clippedRead);
                }
            }
        }
        // TODO -- Performance optimization: we partition the reads by sample 4 times right now; let's unify that code.
        final List<GATKSAMRecord> downsampledReads = DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart);
        if ( MTAC.bamWriter != null && MTAC.emitDroppedReads ) {
@ -1268,9 +1181,9 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
     *
     * @param reads the list of reads to consider
     */
-    private void cleanOverlappingReadPairs(final List<GATKSAMRecord> reads, Set<String> normalSampleNames) {
+    private void cleanOverlappingReadPairs(final List<GATKSAMRecord> reads, final Set<String> normalSampleNames) {
-        Map<String, List<GATKSAMRecord>> data = splitReadsBySample(reads);
+        final Map<String, List<GATKSAMRecord>> data = splitReadsBySample(reads);
-        for ( String sampleName : data.keySet() ) {
+        for ( final String sampleName : data.keySet() ) {
            final boolean isTumor = !normalSampleNames.contains(sampleName);
            final List<GATKSAMRecord> perSampleReadList = data.get(sampleName);
@ -1284,16 +1197,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
    }
-    public static void logReadInfo(String readName, Collection<GATKSAMRecord> records, String message) {
+    public static void logReadInfo(final String readName, final Collection<GATKSAMRecord> records, final String message) {
        if (readName != null) {
-            for (GATKSAMRecord rec : records) {
+            for (final GATKSAMRecord rec : records) {
                logReadInfo(readName, rec, message);
            }
        }
    }
-    public static void logReadInfo(String readName, GATKSAMRecord rec, String message) {
+    public static void logReadInfo(final String readName, final GATKSAMRecord rec, final String message) {
        if (readName != null && rec != null && readName.equals(rec.getReadName())) {
            logger.info("Found " + rec.toString() + " - " + message);
        }
@ -1322,7 +1234,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return result;
    }
-    private boolean isReadFromNormal(GATKSAMRecord rec) {
+    private boolean isReadFromNormal(final GATKSAMRecord rec) {
        return normalSampleName != null && normalSampleName.equals(rec.getReadGroup().getSample());
    }
@ -1338,7 +1250,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    @Advanced
    @Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
    protected boolean doNotRunPhysicalPhasing = false;
 }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/SomaticGenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/SomaticGenotypingEngine.java
@ -54,9 +54,11 @@ package org.broadinstitute.gatk.tools.walkers.cancer.m2;
 import com.google.java.contract.Ensures;
 import htsjdk.samtools.util.StringUtil;
 import htsjdk.variant.variantcontext.*;
 import org.apache.commons.collections.ListUtils;
 import org.apache.commons.lang.mutable.MutableDouble;
 import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.log4j.Logger;
 import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
 import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
 import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCallerGenotypingEngine;
 import org.broadinstitute.gatk.utils.GenomeLoc;
@ -78,16 +80,33 @@ import java.util.*;
 public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
-    protected final M2ArgumentCollection MTAC;
+    private final M2ArgumentCollection MTAC;
    private final TumorPowerCalculator strandArtifactPowerCalculator;
-    final boolean REF_AND_ALT = false;
+
-    final boolean ALT_ONLY = true;
+    private final String tumorSampleName;
    private final String matchedNormalSampleName;
    private final String DEBUG_READ_NAME;
    // {@link GenotypingEngine} requires a non-null {@link AFCalculatorProvider} but this class doesn't need it.  Thus we make a dummy
    private static AFCalculatorProvider DUMMY_AF_CALCULATOR_PROVIDER = new AFCalculatorProvider() {
        public AFCalculator getInstance(final int ploidy, final int maximumAltAlleles) { return null; }
    };
    private final static Logger logger = Logger.getLogger(SomaticGenotypingEngine.class);
-    public SomaticGenotypingEngine(final M2ArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider, final boolean doPhysicalPhasing, final M2ArgumentCollection MTAC) {
+    public SomaticGenotypingEngine(final M2ArgumentCollection configuration,
-        super(configuration, samples, genomeLocParser, afCalculatorProvider, doPhysicalPhasing);
+                                   final SampleList samples,
                                   final GenomeLocParser genomeLocParser,
                                   final boolean doPhysicalPhasing,
                                   final M2ArgumentCollection MTAC,
                                   final String tumorSampleName,
                                   final String matchedNormalSampleName,
                                   final String DEBUG_READ_NAME) {
        super(configuration, samples, genomeLocParser, DUMMY_AF_CALCULATOR_PROVIDER, doPhysicalPhasing);
        this.MTAC = MTAC;
        this.tumorSampleName = tumorSampleName;
        this.matchedNormalSampleName = matchedNormalSampleName;
        this.DEBUG_READ_NAME = DEBUG_READ_NAME;
        // coverage related initialization
        final double errorProbability = Math.pow(10, -(MTAC.POWER_CONSTANT_QSCORE/10));
        strandArtifactPowerCalculator = new TumorPowerCalculator(errorProbability, MTAC.STRAND_ARTIFACT_LOD_THRESHOLD, 0.0f);
@ -105,37 +124,22 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
     * @param ref                                    Reference bytes at active region
     * @param refLoc                                 Corresponding active region genome location
     * @param activeRegionWindow                     Active window
     * @param genomeLocParser                        GenomeLocParser
     * @param activeAllelesToGenotype                Alleles to genotype
     * @param emitReferenceConfidence whether we should add a &lt;NON_REF&gt; alternative allele to the result variation contexts.
     *
     * @return                                       A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
     *
     */
 //    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
    @Ensures("result != null")
    // TODO - can this be refactored? this is hard to follow!
    public CalledHaplotypes callMutations (
-                                                       final List<Haplotype> haplotypes,
+            final List<Haplotype> haplotypes,
-                                                       //final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
+            final ReadLikelihoods<Haplotype> readLikelihoods,
-                                                       final ReadLikelihoods<Haplotype> readLikelihoods,
+            final Map<String, Integer> originalNormalReadQualities,
-                                                       final Map<String, Integer> originalNormalReadQualities,
+            final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
-                                                       final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
+            final byte[] ref,
-                                                       final byte[] ref,
+            final GenomeLoc refLoc,
-                                                       final GenomeLoc refLoc,
+            final GenomeLoc activeRegionWindow,
-                                                       final GenomeLoc activeRegionWindow,
+            final RefMetaDataTracker tracker,
-                                                       final GenomeLocParser genomeLocParser,
+            final List<VariantContext> activeAllelesToGenotype) {
                                                       final RefMetaDataTracker tracker,
                                                       final List<VariantContext> activeAllelesToGenotype,
                                                       final boolean emitReferenceConfidence,
                                                       final String tumorSampleName,
                                                       final String matchedNormalSampleName,
                                                       final RodBinding<VariantContext> dbsnpRod,
                                                       final List<RodBinding<VariantContext>> cosmicRod,
                                                       final String DEBUG_READ_NAME
    ) {
        // sanity check input arguments
        if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
        if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
@ -143,8 +147,6 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        if (refLoc == null || refLoc.size() != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
        if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
        if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
        if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
        // Somatic Tumor/Normal Sample Handling
        verifySamplePresence(tumorSampleName, readLikelihoods.samples());
@ -159,293 +161,217 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        final List<VariantContext> returnCalls = new ArrayList<>();
        for( final int loc : startPosKeySet ) {
-            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
+            if( loc < activeRegionWindow.getStart() || loc > activeRegionWindow.getStop() ) {
-                final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
+                continue;
                if( eventsAtThisLoc.isEmpty() ) { continue; }
                // Create the event mapping object which maps the original haplotype events to the events present at just this locus
                final Map<Event, List<Haplotype>> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes);
                // Sanity check the priority list for mistakes
                final List<String> priorityList = makePriorityList(eventsAtThisLoc);
                // Merge the event to find a common reference representation
                VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList,
                        GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
                        GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
                if( mergedVC == null ) { continue; }
                if (emitReferenceConfidence)
                    mergedVC = addNonRefSymbolicAllele(mergedVC);
                final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
                mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
                for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
                    mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
                }
                final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
                if( configuration.DEBUG && logger != null ) {
                    if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
                }
                ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
                //LDG: do we want to do this before or after pulling out overlapping reads?
                if (MTAC.isSampleContaminationPresent())
                    readAlleleLikelihoods.contaminationDownsampling(MTAC.getSampleContamination());
                //if (!mergedVC.isBiallelic()) {
                //    logger.info("[UNSUPPORTED] Detected non-Biallelic VC" + mergedVC.toString());
                //    continue;
                //}
                // TODO: once tests are passing, refactor to use the new data structure (not the deprecated one)
                // handle overlapping fragments
                // TODO: CONFIRM WITH GSA IF IT IS OK TO REMOVE READS FROM THE PRALM (should be... they do it in filterPoorlyModeledReads!)
                PerReadAlleleLikelihoodMap tumorPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(tumorSampleName));
                filterPRALMForOverlappingReads(tumorPRALM, mergedVC.getReference(), loc, false);
                MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in Tumor PRALM after filtering for overlapping reads");
                // extend to multiple samples
                // compute tumor LOD for each alternate allele
                // TODO: somewhere we have to ensure that the all the alleles in the variant context is in alleleFractions passed to getHetGenotypeLogLikelihoods. getHetGenotypeLogLikelihoods will not check that for you
                final PerAlleleCollection<Double> altAlleleFractions = estimateAlleleFraction(mergedVC, tumorPRALM, false);
                final PerAlleleCollection<Double> tumorHetGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, tumorPRALM, originalNormalReadQualities, altAlleleFractions);
                if( configuration.DEBUG && logger != null ) {
                    StringBuilder outputSB = new StringBuilder("Calculated allelic fraction at " + loc + " = [");
                    for (Allele allele : altAlleleFractions.getAltAlleles()){
                        outputSB.append( allele + ": " + altAlleleFractions.getAlt(allele) + ", ");
                    }
                    outputSB.append("]");
                    logger.info(outputSB.toString());
                }
                final PerAlleleCollection<Double> tumorLods = PerAlleleCollection.createPerAltAlleleCollection();
                for (Allele altAllele : mergedVC.getAlternateAlleles()){
                    tumorLods.set(altAllele, tumorHetGenotypeLLs.get(altAllele) - tumorHetGenotypeLLs.getRef());
                }
                if (configuration.DEBUG && logger != null) {
                    StringBuilder outputSB = new StringBuilder("Tumor LOD at " + loc + " = [");
                    for (Allele altAllele : tumorLods.getAltAlleles()) {
                        outputSB.append( altAllele + ": " + tumorLods.getAlt(altAllele) + ", ");
                    }
                    outputSB.append("]");
                    logger.info(outputSB.toString());
                }
                double INIT_NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
                double NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
                PerReadAlleleLikelihoodMap normalPRALM = null;
                PerAlleleCollection<Double> normalLods = PerAlleleCollection.createPerAltAlleleCollection();
                // if normal bam is available, compute normal LOD
                if (hasNormal) {
                    normalPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(matchedNormalSampleName));
                    filterPRALMForOverlappingReads(normalPRALM, mergedVC.getReference(), loc, true);
                    MuTect2.logReadInfo(DEBUG_READ_NAME, normalPRALM.getLikelihoodReadMap().keySet(), "Present after in Nomral PRALM filtering for overlapping reads");
                    GenomeLoc eventGenomeLoc = genomeLocParser.createGenomeLoc(activeRegionWindow.getContig(), loc);
                    Collection<VariantContext> cosmicVC = tracker.getValues(cosmicRod, eventGenomeLoc);
                    Collection<VariantContext> dbsnpVC = tracker.getValues(dbsnpRod, eventGenomeLoc);
                    // remove the effect of cosmic from dbSNP
                    final boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty());
                    INIT_NORMAL_LOD_THRESHOLD = MTAC.INITIAL_NORMAL_LOD_THRESHOLD; //only set this if this job has a normal
                    NORMAL_LOD_THRESHOLD = (germlineAtRisk)?MTAC.NORMAL_DBSNP_LOD_THRESHOLD:MTAC.NORMAL_LOD_THRESHOLD;
                    // compute normal LOD = LL(X|REF)/LL(X|ALT) where ALT is the diploid HET with AF = 0.5
                    // note normal LOD is REF over ALT, the reciprocal of the tumor LOD
                    final PerAlleleCollection<Double> diploidHetAlleleFractions = PerAlleleCollection.createPerRefAndAltAlleleCollection();
                    for (final Allele allele : mergedVC.getAlternateAlleles()){
                        diploidHetAlleleFractions.setAlt(allele, 0.5);
                    }
                    final PerAlleleCollection<Double> normalGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, normalPRALM, originalNormalReadQualities, diploidHetAlleleFractions);
                    for (final Allele altAllele : mergedVC.getAlternateAlleles()){
                        normalLods.setAlt(altAllele, normalGenotypeLLs.getRef() - normalGenotypeLLs.getAlt(altAllele));
                    }
                }
                int numPassingAlts = 0;
                Set<Allele> allelesThatPassThreshold = new HashSet<>();
                Allele alleleWithHighestTumorLOD = null;
                // TODO: use lambda
                for (Allele altAllele : mergedVC.getAlternateAlleles()) {
                    final boolean passesTumorLodThreshold = tumorLods.getAlt(altAllele) >= MTAC.INITIAL_TUMOR_LOD_THRESHOLD;
                    final boolean passesNormalLodThreshold = hasNormal ? normalLods.getAlt(altAllele) >= INIT_NORMAL_LOD_THRESHOLD : true;
                    if (passesTumorLodThreshold && passesNormalLodThreshold) {
                        numPassingAlts++;
                        allelesThatPassThreshold.add(altAllele);
                        if (alleleWithHighestTumorLOD == null
                                || tumorLods.getAlt(altAllele) > tumorLods.getAlt(alleleWithHighestTumorLOD)){
                            alleleWithHighestTumorLOD = altAllele;
                        }
                    }
                }
                final boolean emitVariant = numPassingAlts > 0;
                VariantContext call = null;
                if (emitVariant) {
                    VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC);
                    // FIXME: can simply get first alternate since above we only deal with Bi-allelic sites...
                    int haplotypeCount = alleleMapper.get(mergedVC.getAlternateAllele(0)).size();
                    callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
                    callVcb.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorLods.getAlt(alleleWithHighestTumorLOD));
                    if (hasNormal) {
                        callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, normalLods.getAlt(alleleWithHighestTumorLOD));
                        if (normalLods.getAlt(alleleWithHighestTumorLOD) < NORMAL_LOD_THRESHOLD) {
                            callVcb.filter(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME);
                        }
                    }
                    // M1-style strand artifact filter
                    // TODO: move code to MuTect2::calculateFilters()
                    // skip if VC has multiple alleles - it will get filtered later anyway
                    if (MTAC.ENABLE_STRAND_ARTIFACT_FILTER && numPassingAlts == 1) {
                        final PerReadAlleleLikelihoodMap forwardPRALM = new PerReadAlleleLikelihoodMap();
                        final PerReadAlleleLikelihoodMap reversePRALM = new PerReadAlleleLikelihoodMap();
                        splitPRALMintoForwardAndReverseReads(tumorPRALM, forwardPRALM, reversePRALM);
                        MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in tumor PRALM after PRALM is split");
                        MuTect2.logReadInfo(DEBUG_READ_NAME, forwardPRALM.getLikelihoodReadMap().keySet(), "Present in forward PRALM after PRALM is split");
                        MuTect2.logReadInfo(DEBUG_READ_NAME, reversePRALM.getLikelihoodReadMap().keySet(), "Present in reverse PRALM after PRALM is split");
                        // TODO: build a new type for probability, likelihood, and log_likelihood. e.g. f_fwd :: probability[], tumorGLs_fwd :: likelihood[]
                        // TODO: don't want to call getHetGenotypeLogLikelihoods on more than one alternate alelle. May need to overload it to take a scalar f_fwd.
                        final PerAlleleCollection<Double> alleleFractionsForward = estimateAlleleFraction(mergedVC, forwardPRALM, true);
                        final PerAlleleCollection<Double> tumorGenotypeLLForward = getHetGenotypeLogLikelihoods(mergedVC, forwardPRALM, originalNormalReadQualities, alleleFractionsForward);
                        final PerAlleleCollection<Double> alleleFractionsReverse = estimateAlleleFraction(mergedVC, reversePRALM, true);
                        final PerAlleleCollection<Double> tumorGenotypeLLReverse = getHetGenotypeLogLikelihoods(mergedVC, reversePRALM, originalNormalReadQualities, alleleFractionsReverse);
                        if( configuration.DEBUG && logger != null ) {
                            StringBuilder forwardMessage = new StringBuilder("Calculated forward allelic fraction at " + loc + " = [");
                            StringBuilder reverseMessage = new StringBuilder("Calculated reverse allelic fraction at " + loc + " = [");
                            for (Allele altAllele : altAlleleFractions.getAltAlleles()){
                                forwardMessage.append( altAllele + ": " + alleleFractionsForward.getAlt(altAllele) + ", ");
                                reverseMessage.append( altAllele + ": " + alleleFractionsReverse.getAlt(altAllele) + ", ");
                            }
                            forwardMessage.append("]");
                            reverseMessage.append("]");
                            logger.info(forwardMessage.toString());
                            logger.info(reverseMessage.toString());
                        }
                        double tumorLod_fwd = tumorGenotypeLLForward.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLForward.getRef();
                        double tumorLod_rev = tumorGenotypeLLReverse.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLReverse.getRef();
                        double tumorSBpower_fwd = 0.0;
                        double tumorSBpower_rev = 0.0;
                        try {
                            // Note that we use the observed combined (+ and -) allele fraction for power calculation in either direction
                            tumorSBpower_fwd = strandArtifactPowerCalculator.cachedPowerCalculation(forwardPRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
                            tumorSBpower_rev = strandArtifactPowerCalculator.cachedPowerCalculation(reversePRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
                        }
                        catch (Throwable t) {
                            System.err.println("Error processing " + activeRegionWindow.getContig() + ":" + loc);
                            t.printStackTrace(System.err);
                            throw new RuntimeException(t);
                        }
                        callVcb.attribute(GATKVCFConstants.TLOD_FWD_KEY, tumorLod_fwd);
                        callVcb.attribute(GATKVCFConstants.TLOD_REV_KEY, tumorLod_rev);
                        callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_FWD_KEY, tumorSBpower_fwd);
                        callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_REV_KEY, tumorSBpower_rev);
                        // TODO: add vcf INFO fields. see callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
                        if ((tumorSBpower_fwd > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_fwd < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD) ||
                                (tumorSBpower_rev > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_rev < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD))
                            callVcb.filter(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
                    }
                    // TODO: this probably belongs in M2::calculateFilters()
                    if (numPassingAlts > 1) {
                        callVcb.filter(GATKVCFConstants.TRIALLELIC_SITE_FILTER_NAME);
                    }
                    // build genotypes TODO: this part needs review and refactor
                    List<Allele> tumorAlleles = new ArrayList<>();
                    tumorAlleles.add(mergedVC.getReference());
                    tumorAlleles.add(alleleWithHighestTumorLOD);
                    Genotype tumorGenotype = new GenotypeBuilder(tumorSampleName, tumorAlleles)
                            .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, altAlleleFractions.getAlt(alleleWithHighestTumorLOD))
                            .make(); // TODO: add ADs?
                    List<Genotype> genotypes = new ArrayList<>();
                    genotypes.add(tumorGenotype);
                    // We assume that the genotype in the normal is 0/0
                    // TODO: is normal always homozygous reference?
                    List<Allele> homRefAllelesforNormalGenotype = new ArrayList<>();
                    homRefAllelesforNormalGenotype.addAll(Collections.nCopies(2, mergedVC.getReference()));
                    // if we are calling with a normal, build the genotype for the sample to appear in vcf
                    int REF = 0, ALT = 1;
                    if (hasNormal) {
                        PerAlleleCollection<Integer> normalCounts = getRefAltCount(mergedVC, normalPRALM, false);
                        final int normalRefAlleleDepth = normalCounts.getRef();
                        final int normalAltAlleleDepth = normalCounts.getAlt(alleleWithHighestTumorLOD);
                        final int[] normalAlleleDepths = { normalRefAlleleDepth, normalAltAlleleDepth };
                        final double normalAlleleFraction = (double) normalAltAlleleDepth / ( normalRefAlleleDepth + normalAltAlleleDepth);
                        final Genotype normalGenotype = new GenotypeBuilder(matchedNormalSampleName, homRefAllelesforNormalGenotype)
                                .AD(normalAlleleDepths)
                                .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, normalAlleleFraction)
                                .make();
                        genotypes.add(normalGenotype);
                    }
                    //only use alleles found in the tumor (
                    call = new VariantContextBuilder(callVcb).alleles(tumorAlleles).genotypes(genotypes).make();
                }
                // how should we be making use of _perSampleFilteredReadList_?
                if( call != null ) {
                    readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
                            genomeLocParser, emitReferenceConfidence, alleleMapper, readAlleleLikelihoods, call);
                    ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC.getChr(), mergedVC.getStart(), mergedVC.getEnd()), refLoc, ref);
                    VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker, readAlleleLikelihoods, call, false);
                    if( call.getAlleles().size() != mergedVC.getAlleles().size() )
                        annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
                    // maintain the set of all called haplotypes
                    for ( final Allele calledAllele : call.getAlleles() ) {
                        final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
                        if (haplotypeList == null) continue;
                        calledHaplotypes.addAll(haplotypeList);
                    }
                    returnCalls.add( annotatedCall );
                }
            }
            final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
            if( eventsAtThisLoc.isEmpty() ) { continue; }
            // Create the event mapping object which maps the original haplotype events to the events present at just this locus
            final Map<Event, List<Haplotype>> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes);
            final List<String> priorityList = makePriorityList(eventsAtThisLoc);
            VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList,
                    GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
                    GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
            if( mergedVC == null ) { continue; }
            final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
            mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
            for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
                mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
            }
            final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
            ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
            //LDG: do we want to do this before or after pulling out overlapping reads?
            if (MTAC.isSampleContaminationPresent())
                readAlleleLikelihoods.contaminationDownsampling(MTAC.getSampleContamination());
            final PerReadAlleleLikelihoodMap tumorPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(tumorSampleName));
            filterPRALMForOverlappingReads(tumorPRALM, mergedVC.getReference(), loc, false);
            MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in Tumor PRALM after filtering for overlapping reads");
            // extend to multiple samples
            // compute tumor LOD for each alternate allele
            // TODO: somewhere we have to ensure that the all the alleles in the variant context is in alleleFractions passed to getHetGenotypeLogLikelihoods. getHetGenotypeLogLikelihoods will not check that for you
            final PerAlleleCollection<Double> altAlleleFractions = estimateAlleleFraction(mergedVC, tumorPRALM, false);
            final PerAlleleCollection<Double> tumorHetGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, tumorPRALM, originalNormalReadQualities, altAlleleFractions);
            final PerAlleleCollection<Double> tumorLods = PerAlleleCollection.createPerAltAlleleCollection();
            for (final Allele altAllele : mergedVC.getAlternateAlleles()){
                tumorLods.set(altAllele, tumorHetGenotypeLLs.get(altAllele) - tumorHetGenotypeLLs.getRef());
            }
            double INIT_NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
            double NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
            PerReadAlleleLikelihoodMap normalPRALM = null;
            final PerAlleleCollection<Double> normalLods = PerAlleleCollection.createPerAltAlleleCollection();
            // if normal bam is available, compute normal LOD
            if (hasNormal) {
                normalPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(matchedNormalSampleName));
                filterPRALMForOverlappingReads(normalPRALM, mergedVC.getReference(), loc, true);
                MuTect2.logReadInfo(DEBUG_READ_NAME, normalPRALM.getLikelihoodReadMap().keySet(), "Present after in Nomral PRALM filtering for overlapping reads");
                final GenomeLoc eventGenomeLoc = genomeLocParser.createGenomeLoc(activeRegionWindow.getContig(), loc);
                final Collection<VariantContext> cosmicVC = tracker.getValues(MTAC.cosmicRod, eventGenomeLoc);
                final Collection<VariantContext> dbsnpVC = tracker.getValues(MTAC.dbsnp.dbsnp, eventGenomeLoc);
                // remove the effect of cosmic from dbSNP
                final boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty());
                INIT_NORMAL_LOD_THRESHOLD = MTAC.INITIAL_NORMAL_LOD_THRESHOLD; //only set this if this job has a normal
                NORMAL_LOD_THRESHOLD = (germlineAtRisk)?MTAC.NORMAL_DBSNP_LOD_THRESHOLD:MTAC.NORMAL_LOD_THRESHOLD;
                // compute normal LOD = LL(X|REF)/LL(X|ALT) where ALT is the diploid HET with AF = 0.5
                // note normal LOD is REF over ALT, the reciprocal of the tumor LOD
                final PerAlleleCollection<Double> diploidHetAlleleFractions = PerAlleleCollection.createPerRefAndAltAlleleCollection();
                for (final Allele allele : mergedVC.getAlternateAlleles()){
                    diploidHetAlleleFractions.setAlt(allele, 0.5);
                }
                final PerAlleleCollection<Double> normalGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, normalPRALM, originalNormalReadQualities, diploidHetAlleleFractions);
                for (final Allele altAllele : mergedVC.getAlternateAlleles()){
                    normalLods.setAlt(altAllele, normalGenotypeLLs.getRef() - normalGenotypeLLs.getAlt(altAllele));
                }
            }
            int numPassingAlts = 0;
            final Set<Allele> allelesThatPassThreshold = new HashSet<>();
            Allele alleleWithHighestTumorLOD = null;
            // TODO: use lambda
            for (final Allele altAllele : mergedVC.getAlternateAlleles()) {
                final boolean passesTumorLodThreshold = tumorLods.getAlt(altAllele) >= MTAC.INITIAL_TUMOR_LOD_THRESHOLD;
                final boolean passesNormalLodThreshold = hasNormal ? normalLods.getAlt(altAllele) >= INIT_NORMAL_LOD_THRESHOLD : true;
                if (passesTumorLodThreshold && passesNormalLodThreshold) {
                    numPassingAlts++;
                    allelesThatPassThreshold.add(altAllele);
                    if (alleleWithHighestTumorLOD == null
                            || tumorLods.getAlt(altAllele) > tumorLods.getAlt(alleleWithHighestTumorLOD)){
                        alleleWithHighestTumorLOD = altAllele;
                    }
                }
            }
            if (numPassingAlts == 0) {
                continue;
            }
            final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC);
            // FIXME: can simply get first alternate since above we only deal with Bi-allelic sites...
            final int haplotypeCount = alleleMapper.get(mergedVC.getAlternateAllele(0)).size();
            callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
            callVcb.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorLods.getAlt(alleleWithHighestTumorLOD));
            if (hasNormal) {
                callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, normalLods.getAlt(alleleWithHighestTumorLOD));
                if (normalLods.getAlt(alleleWithHighestTumorLOD) < NORMAL_LOD_THRESHOLD) {
                    callVcb.filter(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME);
                }
            }
            // M1-style strand artifact filter
            // TODO: move code to MuTect2::calculateFilters()
            // skip if VC has multiple alleles - it will get filtered later anyway
            if (MTAC.ENABLE_STRAND_ARTIFACT_FILTER && numPassingAlts == 1) {
                final PerReadAlleleLikelihoodMap forwardPRALM = new PerReadAlleleLikelihoodMap();
                final PerReadAlleleLikelihoodMap reversePRALM = new PerReadAlleleLikelihoodMap();
                splitPRALMintoForwardAndReverseReads(tumorPRALM, forwardPRALM, reversePRALM);
                MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in tumor PRALM after PRALM is split");
                MuTect2.logReadInfo(DEBUG_READ_NAME, forwardPRALM.getLikelihoodReadMap().keySet(), "Present in forward PRALM after PRALM is split");
                MuTect2.logReadInfo(DEBUG_READ_NAME, reversePRALM.getLikelihoodReadMap().keySet(), "Present in reverse PRALM after PRALM is split");
                // TODO: build a new type for probability, likelihood, and log_likelihood. e.g. f_fwd :: probability[], tumorGLs_fwd :: likelihood[]
                // TODO: don't want to call getHetGenotypeLogLikelihoods on more than one alternate alelle. May need to overload it to take a scalar f_fwd.
                final PerAlleleCollection<Double> alleleFractionsForward = estimateAlleleFraction(mergedVC, forwardPRALM, true);
                final PerAlleleCollection<Double> tumorGenotypeLLForward = getHetGenotypeLogLikelihoods(mergedVC, forwardPRALM, originalNormalReadQualities, alleleFractionsForward);
                final PerAlleleCollection<Double> alleleFractionsReverse = estimateAlleleFraction(mergedVC, reversePRALM, true);
                final PerAlleleCollection<Double> tumorGenotypeLLReverse = getHetGenotypeLogLikelihoods(mergedVC, reversePRALM, originalNormalReadQualities, alleleFractionsReverse);
                final double tumorLod_fwd = tumorGenotypeLLForward.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLForward.getRef();
                final double tumorLod_rev = tumorGenotypeLLReverse.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLReverse.getRef();
                // Note that we use the observed combined (+ and -) allele fraction for power calculation in either direction
                final double tumorSBpower_fwd = strandArtifactPowerCalculator.cachedPowerCalculation(forwardPRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
                final double tumorSBpower_rev = strandArtifactPowerCalculator.cachedPowerCalculation(reversePRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
                callVcb.attribute(GATKVCFConstants.TLOD_FWD_KEY, tumorLod_fwd);
                callVcb.attribute(GATKVCFConstants.TLOD_REV_KEY, tumorLod_rev);
                callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_FWD_KEY, tumorSBpower_fwd);
                callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_REV_KEY, tumorSBpower_rev);
                // TODO: add vcf INFO fields. see callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
                if ((tumorSBpower_fwd > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_fwd < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD) ||
                        (tumorSBpower_rev > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_rev < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD))
                    callVcb.filter(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
            }
            // TODO: this probably belongs in M2::calculateFilters()
            if (numPassingAlts > 1) {
                callVcb.filter(GATKVCFConstants.TRIALLELIC_SITE_FILTER_NAME);
            }
            // build genotypes TODO: this part needs review and refactor
            final List<Allele> tumorAlleles = Arrays.asList(mergedVC.getReference(), alleleWithHighestTumorLOD);
            final Genotype tumorGenotype = new GenotypeBuilder(tumorSampleName, tumorAlleles)
                    .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, altAlleleFractions.getAlt(alleleWithHighestTumorLOD))
                    .make(); // TODO: add ADs?
            final List<Genotype> genotypes = new ArrayList<>();
            genotypes.add(tumorGenotype);
            // We assume that the genotype in the normal is 0/0
            // TODO: is normal always homozygous reference?
            final List<Allele> homRefAllelesforNormalGenotype = Collections.nCopies(2, mergedVC.getReference());
            // if we are calling with a normal, build the genotype for the sample to appear in vcf
            if (hasNormal) {
                final PerAlleleCollection<Integer> normalCounts = getRefAltCount(mergedVC, normalPRALM, false);
                final int normalRefAlleleDepth = normalCounts.getRef();
                final int normalAltAlleleDepth = normalCounts.getAlt(alleleWithHighestTumorLOD);
                final int[] normalAlleleDepths = { normalRefAlleleDepth, normalAltAlleleDepth };
                final double normalAlleleFraction = (double) normalAltAlleleDepth / ( normalRefAlleleDepth + normalAltAlleleDepth);
                final Genotype normalGenotype = new GenotypeBuilder(matchedNormalSampleName, homRefAllelesforNormalGenotype)
                        .AD(normalAlleleDepths)
                        .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, normalAlleleFraction)
                        .make();
                genotypes.add(normalGenotype);
            }
            //only use alleles found in the tumor (
            final VariantContext call = new VariantContextBuilder(callVcb).alleles(tumorAlleles).genotypes(genotypes).make();
            // how should we be making use of _perSampleFilteredReadList_?
            readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
                    genomeLocParser, false, alleleMapper, readAlleleLikelihoods, call);
            final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC.getChr(), mergedVC.getStart(), mergedVC.getEnd()), refLoc, ref);
            VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker, readAlleleLikelihoods, call, false);
            if( call.getAlleles().size() != mergedVC.getAlleles().size() )
                annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
            // maintain the set of all called haplotypes
            call.getAlleles().stream().map(alleleMapper::get).filter(Objects::nonNull).forEach(calledHaplotypes::addAll);
            returnCalls.add( annotatedCall );
        }
        // TODO: understand effect of enabling this for somatic calling...
-        final List<VariantContext> phasedCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
+        final List<VariantContext> outputCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
-         return new CalledHaplotypes(phasedCalls, calledHaplotypes);
+        return new CalledHaplotypes(outputCalls, calledHaplotypes);
        //return new CalledHaplotypes(returnCalls, calledHaplotypes);
    }
-    private void verifySamplePresence(String sampleName, List<String> samples) {
+    private void verifySamplePresence(final String sampleName, final List<String> samples) {
        if (!samples.contains(sampleName)) {
            throw new IllegalArgumentException("Unable to find sample name "+sampleName+"in sample list of " + StringUtil.join(",", samples));
        }
@ -459,7 +385,7 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
     * @param alleleFractions                       allele fraction(s) for alternate allele(s)
     *
     * @return                                      genotype likelihoods for homRef and het for each alternate allele
-    */
+     */
    private PerAlleleCollection<Double> getHetGenotypeLogLikelihoods(final VariantContext mergedVC,
                                                                     final PerReadAlleleLikelihoodMap tumorPRALM,
                                                                     final Map<String, Integer> originalNormalMQs,
@ -470,13 +396,11 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        }
        final PerAlleleCollection<MutableDouble> genotypeLogLikelihoods = PerAlleleCollection.createPerRefAndAltAlleleCollection();
-        for (final Allele allele : mergedVC.getAlleles()){
+        mergedVC.getAlleles().forEach(a -> genotypeLogLikelihoods.set(a, new MutableDouble(0)));
            genotypeLogLikelihoods.set(allele, new MutableDouble(0.0));
        }
        final Allele refAllele = mergedVC.getReference();
        for(Map.Entry<GATKSAMRecord,Map<Allele, Double>> readAlleleLikelihoodMap : tumorPRALM.getLikelihoodReadMap().entrySet()) {
-            Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
+            final Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
            if (originalNormalMQs.get(readAlleleLikelihoodMap.getKey().getReadName()) == 0) {
                continue;
            }
@ -484,9 +408,9 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
            final double readRefLogLikelihood = alleleLikelihoodMap.get(refAllele);
            genotypeLogLikelihoods.getRef().add(readRefLogLikelihood);
-            for (Allele altAllele : alleleFractions.getAltAlleles()) {
+            for (final Allele altAllele : alleleFractions.getAltAlleles()) {
-                double readAltLogLikelihood = alleleLikelihoodMap.get(altAllele);
+                final double readAltLogLikelihood = alleleLikelihoodMap.get(altAllele);
-                double adjustedReadAltLL = Math.log10(
+                final double adjustedReadAltLL = Math.log10(
                        Math.pow(10, readRefLogLikelihood) * (1 - alleleFractions.getAlt(altAllele)) +
                                Math.pow(10, readAltLogLikelihood) * alleleFractions.getAlt(altAllele)
                );
@ -515,9 +439,9 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        final PerAlleleCollection<Integer> alleleCounts = getRefAltCount(vc, pralm, oneStrandOnly);
        final PerAlleleCollection<Double> alleleFractions = PerAlleleCollection.createPerAltAlleleCollection();
-        int refCount = alleleCounts.getRef();
+        final int refCount = alleleCounts.getRef();
        for ( final Allele altAllele : vc.getAlternateAlleles() ) {
-            int altCount = alleleCounts.getAlt(altAllele);
+            final int altCount = alleleCounts.getAlt(altAllele);
            double alleleFraction = (double) altCount / (refCount + altCount);
            // weird case, but I've seen it happen in one strand cases
            if (refCount == 0 && altCount == refCount ) {
@ -552,16 +476,12 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        final PerAlleleCollection<MutableInt> alleleCounts = PerAlleleCollection.createPerRefAndAltAlleleCollection();
-
+        vcAlleles.stream().forEach(a -> alleleCounts.set(a, new MutableInt(0)));
        // initialize the allele counts to 0
        for (final Allele allele : vcAlleles) {
            alleleCounts.set(allele, new MutableInt(0));
        }
        for (final Map.Entry<GATKSAMRecord, Map<Allele, Double>> readAlleleLikelihoodMap : pralm.getLikelihoodReadMap().entrySet()) {
            final GATKSAMRecord read = readAlleleLikelihoodMap.getKey();
            final Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
-            MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(alleleLikelihoodMap, vcAlleles);
+            final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(alleleLikelihoodMap, vcAlleles);
            if (read.getMappingQuality() > 0 && mostLikelyAllele.isInformative()) {
                alleleCounts.get(mostLikelyAllele.getMostLikelyAllele()).increment();
@ -581,18 +501,16 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        }
    }
-    private void filterPRALMForOverlappingReads(PerReadAlleleLikelihoodMap pralm, Allele ref, int location, boolean retainMismatches) {
+    private void filterPRALMForOverlappingReads(final PerReadAlleleLikelihoodMap pralm, final Allele ref, final int location, final boolean retainMismatches) {
-
+        final Map<GATKSAMRecord, Map<Allele, Double>> m = pralm.getLikelihoodReadMap();
        Map<GATKSAMRecord, Map<Allele, Double>> m = pralm.getLikelihoodReadMap();
        // iterate through the reads, if the name has been seen before we have overlapping (potentially) fragments, so handle them
-        Map<String, GATKSAMRecord> nameToRead = new HashMap<>();
+        final Map<String, GATKSAMRecord> nameToRead = new HashMap<>();
-        Set<GATKSAMRecord> readsToKeep = new HashSet<>();
+        final Set<GATKSAMRecord> readsToKeep = new HashSet<>();
-        for(GATKSAMRecord rec : m.keySet()) {
+        for(final GATKSAMRecord rec : m.keySet()) {
            // if we haven't seen it... just record the name and add it to the list of reads to keep
-            GATKSAMRecord existing = nameToRead.get(rec.getReadName());
+            final GATKSAMRecord existing = nameToRead.get(rec.getReadName());
            if (existing == null) {
                nameToRead.put(rec.getReadName(), rec);
                readsToKeep.add(rec);
@ -604,11 +522,11 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
                // TODO: CHECK IF THE READS BOTH OVERLAP THE POSITION!!!!
                if ( ReadUtils.isInsideRead(existing, location) && ReadUtils.isInsideRead(rec, location) ) {
-                    MostLikelyAllele existingMLA = pralm.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(existing));
+                    final MostLikelyAllele existingMLA = PerReadAlleleLikelihoodMap.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(existing));
-                    Allele existingAllele = existingMLA.getMostLikelyAllele();
+                    final Allele existingAllele = existingMLA.getMostLikelyAllele();
-                    MostLikelyAllele recMLA = pralm.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(rec));
+                    final MostLikelyAllele recMLA = PerReadAlleleLikelihoodMap.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(rec));
-                    Allele recAllele = recMLA.getMostLikelyAllele();
+                    final Allele recAllele = recMLA.getMostLikelyAllele();
                    // if the reads disagree at this position...
                    if (!existingAllele.equals(recAllele)) {
@ -654,7 +572,7 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
    }
    private void splitPRALMintoForwardAndReverseReads(final PerReadAlleleLikelihoodMap originalPRALM, final PerReadAlleleLikelihoodMap forwardPRALM, final PerReadAlleleLikelihoodMap reversePRALM) {
-        Map<GATKSAMRecord, Map<Allele, Double>> origReadAlleleLikelihoodMap = originalPRALM.getLikelihoodReadMap();
+        final Map<GATKSAMRecord, Map<Allele, Double>> origReadAlleleLikelihoodMap = originalPRALM.getLikelihoodReadMap();
        for (final GATKSAMRecord read : origReadAlleleLikelihoodMap.keySet()) {
            if (read.isStrandless())
                continue;
@ -669,17 +587,4 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
            }
        }
    }
    // Move to utility class so we can use one shared with HaplotypeCallerGenotypingEngine
    private VariantContext addNonRefSymbolicAllele(final VariantContext mergedVC) {
        final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
        final List<Allele> originalList = mergedVC.getAlleles();
        final List<Allele> alleleList = new ArrayList<>(originalList.size() + 1);
        alleleList.addAll(mergedVC.getAlleles());
        alleleList.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
        vcb.alleles(alleleList);
        return vcb.make();
    }
 }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/TumorPowerCalculator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/TumorPowerCalculator.java
@ -55,6 +55,7 @@ import org.apache.commons.math.MathException;
 import org.apache.commons.math.distribution.BinomialDistribution;
 import org.apache.commons.math.distribution.BinomialDistributionImpl;
 import org.apache.commons.math3.util.Pair;
 import org.broadinstitute.gatk.utils.exceptions.GATKException;
 import java.util.Arrays;
 import java.util.HashMap;
@ -70,7 +71,6 @@ public class TumorPowerCalculator {
    private final double tumorLODThreshold;
    private final double contamination;
    private final boolean enableSmoothing;
    public static int numCacheHits = 0;
    private final HashMap<PowerCacheKey, Double> cache = new HashMap<PowerCacheKey, Double>();
@ -134,16 +134,18 @@ public class TumorPowerCalculator {
     * @throws MathException
     *
     */
-    public double cachedPowerCalculation(final int numReads, final double alleleFraction) throws MathException {
+    public double cachedPowerCalculation(final int numReads, final double alleleFraction) {
        PowerCacheKey key = new PowerCacheKey(numReads, alleleFraction);
        // we first look up if power for given number of read and allele fraction has already been computed and stored in the cache.
        // if not we compute it and store it in teh cache.
        Double power = cache.get(key);
        if (power == null) {
-            power = calculatePower(numReads, alleleFraction);
+            try {
                power = calculatePower(numReads, alleleFraction);
            } catch (final Exception ex) {
                throw new GATKException("Power calculation failed", ex);
            }
            cache.put(key, power);
        } else {
            numCacheHits++;
        }
        return power;
    }