Merge pull request #1453 from broadinstitute/db_mutect2

Lots of small improvements to Mutect2 code
2016-08-23 18:35:38 -04:00 · 2016-08-23 18:35:38 -04:00 · 4aede99697
parent 53e1d42d28 01142dfb1c
commit 4aede99697
4 changed files with 446 additions and 599 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java
@ -51,12 +51,41 @@

 package org.broadinstitute.gatk.tools.walkers.cancer.m2;

+import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
 import org.broadinstitute.gatk.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
-import org.broadinstitute.gatk.utils.commandline.Advanced;
-import org.broadinstitute.gatk.utils.commandline.Argument;
-import org.broadinstitute.gatk.utils.commandline.Hidden;
+import org.broadinstitute.gatk.utils.commandline.*;
+
+import java.util.Collections;
+import java.util.List;

 public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection {
+
+    /***************************************/
+    // Reference Metadata inputs
+    /***************************************/
+    /**
+     * MuTect2 has the ability to use COSMIC data in conjunction with dbSNP to adjust the threshold for evidence of a variant
+     * in the normal.  If a variant is present in dbSNP, but not in COSMIC, then more evidence is required from the normal
+     * sample to prove the variant is not present in germline.
+     */
+    @Input(fullName="cosmic", shortName = "cosmic", doc="VCF file of COSMIC sites", required=false)
+    public List<RodBinding<VariantContext>> cosmicRod = Collections.emptyList();
+
+    /**
+     * A panel of normals can be a useful (optional) input to help filter out commonly seen sequencing noise that may appear as low allele-fraction somatic variants.
+     */
+    @Input(fullName="normal_panel", shortName = "PON", doc="VCF file of sites observed in normal", required=false)
+    public List<RodBinding<VariantContext>> normalPanelRod = Collections.emptyList();
+
+
+    /**
+     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
+     * dbSNP overlap is only used to require more evidence of absence in the normal if the variant in question has been seen before in germline.
+     */
+    @ArgumentCollection
+    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+
    @Advanced
    @Argument(fullName="m2debug", shortName="m2debug", doc="Print out very verbose M2 debug information", required = false)
    public boolean M2_DEBUG = false;
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
@ -201,7 +201,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    // the genotyping engine
    protected HaplotypeCallerGenotypingEngine genotypingEngine = null;

-
    private byte MIN_TAIL_QUALITY;
    private double log10GlobalReadMismappingRate;

@ -214,9 +213,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    @ArgumentCollection
    protected LikelihoodEngineArgumentCollection LEAC = new LikelihoodEngineArgumentCollection();

-
    @Argument(fullName = "debug_read_name", required = false, doc="trace this read name through the calling process")
-    public String DEBUG_READ_NAME = null;
+    protected String DEBUG_READ_NAME = null;

    @Hidden
    @Advanced
@ -224,24 +222,81 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    final public int MQthreshold = 20;


-    /***************************************/
-    // Reference Metadata inputs
-    /***************************************/
-    /**
-     * MuTect2 has the ability to use COSMIC data in conjunction with dbSNP to adjust the threshold for evidence of a variant
-     * in the normal.  If a variant is present in dbSNP, but not in COSMIC, then more evidence is required from the normal
-     * sample to prove the variant is not present in germline.
-     */
-    @Input(fullName="cosmic", shortName = "cosmic", doc="VCF file of COSMIC sites", required=false)
-    public List<RodBinding<VariantContext>> cosmicRod = Collections.emptyList();
-
-    /**
-     * A panel of normals can be a useful (optional) input to help filter out commonly seen sequencing noise that may appear as low allele-fraction somatic variants.
-     */
-    @Input(fullName="normal_panel", shortName = "PON", doc="VCF file of sites observed in normal", required=false)
-    public List<RodBinding<VariantContext>> normalPanelRod = Collections.emptyList();
+    public RodBinding<VariantContext> getDbsnpRodBinding() { return MTAC.dbsnp.dbsnp; }

    private HaplotypeBAMWriter haplotypeBAMWriter;
+    /**
+     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
+     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
+     *  Records that are filtered in the comp track will be ignored.
+     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
+     */
+    @Advanced
+    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
+    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
+    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
+
+    /**
+     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
+     */
+    @Advanced
+    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
+    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBySample", "TandemRepeatAnnotator", "OxoGReadCounts"}));
+
+    /**
+     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
+     * so annotations will be excluded even if they are explicitly included with the other options.
+     */
+    @Advanced
+    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
+    protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions"}));
+
+    /**
+     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
+     */
+    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
+    protected String[] annotationClassesToUse = { };
+
+    @Output(doc="File to which variants should be written")
+    protected VariantContextWriter vcfWriter = null;
+
+    /**
+     * Active region trimmer reference.
+     */
+    @ArgumentCollection
+    protected ActiveRegionTrimmer trimmer = new ActiveRegionTrimmer();
+
+    @Hidden
+    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
+    protected String keepRG = null;
+
+    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
+    public byte MIN_BASE_QUALTY_SCORE = 10;
+
+    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.HOSTILE;
+
+    @Hidden
+    @Argument(fullName="errorCorrectReads", shortName="errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly.  May cause fundamental problems with the assembly graph itself", required=false)
+    protected boolean errorCorrectReads = false;
+
+    @Hidden
+    @Argument(fullName="captureAssemblyFailureBAM", shortName="captureAssemblyFailureBAM", doc="If specified, we will write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false)
+    protected boolean captureAssemblyFailureBAM = false;
+
+    @Advanced
+    @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false)
+    protected boolean dontUseSoftClippedBases = false;
+
+    @Hidden
+    @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code.  Useful for benchmarking and scalability testing", required=false)
+    protected boolean justDetermineActiveRegions = false;
+
+    // reference base padding size
+    private static final int REFERENCE_PADDING = 500;
+
+    private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
+    private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
+    private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument

    @Override
    public void initialize() {
@ -250,15 +305,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSAMFileSamples(getToolkit().getSAMFileHeader())));

        // MUTECT: check that we have at least one tumor bam
-        for(SAMReaderID id : getToolkit().getReadsDataSource().getReaderIDs()) {
+        for(final SAMReaderID id : getToolkit().getReadsDataSource().getReaderIDs()) {
            if (id.getTags().getPositionalTags().size() == 0) {
                throw new RuntimeException("BAMs must be tagged as either 'tumor' or 'normal'");
            }

            // only supports single-sample BAMs (ie first read group is representative)
-            String bamSampleName = getToolkit().getReadsDataSource().getHeader(id).getReadGroups().get(0).getSample();
+            final String bamSampleName = getToolkit().getReadsDataSource().getHeader(id).getReadGroups().get(0).getSample();

-            for(String tag : id.getTags().getPositionalTags()) {
+            for(final String tag : id.getTags().getPositionalTags()) {
                if (BAM_TAG_TUMOR.equalsIgnoreCase(tag)) {
                    tumorSAMReaderIDs.add(id);
                    if (tumorSampleName == null) {
@ -330,9 +385,9 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        final MergeVariantsAcrossHaplotypes variantMerger = new MergeVariantsAcrossHaplotypes();

        final GenomeAnalysisEngine toolkit = getToolkit();
-        final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();

-        genotypingEngine = new SomaticGenotypingEngine( MTAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(), MTAC, logger), !doNotRunPhysicalPhasing, MTAC);
+        genotypingEngine = new SomaticGenotypingEngine( MTAC, samplesList, toolkit.getGenomeLocParser(), !doNotRunPhysicalPhasing, MTAC,
+                tumorSampleName, normalSampleName, DEBUG_READ_NAME);

        genotypingEngine.setCrossHaplotypeEventMerger(variantMerger);
        genotypingEngine.setAnnotationEngine(annotationEngine);
@ -353,7 +408,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        trimmer.snpPadding = 50;

        samplesList = toolkit.getReadSampleList();
-        Set<String> sampleSet = SampleListUtils.asSet(samplesList);
+        final Set<String> sampleSet = SampleListUtils.asSet(samplesList);

        if( MTAC.CONTAMINATION_FRACTION_FILE != null )
            MTAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(MTAC.CONTAMINATION_FRACTION_FILE, MTAC.CONTAMINATION_FRACTION, sampleSet, logger));
@ -367,7 +422,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
        final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());

-        Set<VCFHeaderLine> headerInfo = new HashSet<>();
+        final Set<VCFHeaderLine> headerInfo = new HashSet<>();

        // all annotation fields from VariantAnnotatorEngine
        headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
@ -383,7 +438,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        headerInfo.addAll(getM2HeaderLines());
        headerInfo.addAll(getSampleHeaderLines());

-        List<String> outputSampleNames = getOutputSampleNames();
+        final List<String> outputSampleNames = getOutputSampleNames();

        vcfWriter.writeHeader(new VCFHeader(headerInfo, outputSampleNames));

@ -391,7 +446,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    private Set<VCFHeaderLine> getM2HeaderLines(){
-        Set<VCFHeaderLine> headerInfo = new HashSet<>();
+        final Set<VCFHeaderLine> headerInfo = new HashSet<>();
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NORMAL_LOD_KEY));
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.TUMOR_LOD_KEY));
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.PANEL_OF_NORMALS_COUNT_KEY));
@ -430,21 +485,21 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    private Set<VCFHeaderLine> getSampleHeaderLines(){
-        Set<VCFHeaderLine> sampleLines = new HashSet<>();
+        final Set<VCFHeaderLine> sampleLines = new HashSet<>();
        if (printTCGAsampleHeader) {
            //NOTE: This will only list the first bam file for each tumor/normal sample if there is more than one
-            Map<String, String> normalSampleHeaderAttributes = new HashMap<>();
+            final Map<String, String> normalSampleHeaderAttributes = new HashMap<>();
            normalSampleHeaderAttributes.put("ID", "NORMAL");
            normalSampleHeaderAttributes.put("SampleName", normalSampleName);
            if (normalSAMReaderIDs.iterator().hasNext() && !getToolkit().getArguments().disableCommandLineInVCF)
                normalSampleHeaderAttributes.put("File", normalSAMReaderIDs.iterator().next().getSamFilePath());
-            VCFSimpleHeaderLine normalSampleHeader = new VCFSimpleHeaderLine("SAMPLE", normalSampleHeaderAttributes);
-            Map<String, String> tumorSampleHeaderAttributes = new HashMap<>();
+            final VCFSimpleHeaderLine normalSampleHeader = new VCFSimpleHeaderLine("SAMPLE", normalSampleHeaderAttributes);
+            final Map<String, String> tumorSampleHeaderAttributes = new HashMap<>();
            tumorSampleHeaderAttributes.put("ID", "TUMOR");
            tumorSampleHeaderAttributes.put("SampleName", tumorSampleName);
            if (tumorSAMReaderIDs.iterator().hasNext() && !getToolkit().getArguments().disableCommandLineInVCF)
                tumorSampleHeaderAttributes.put("File", tumorSAMReaderIDs.iterator().next().getSamFilePath());
-            VCFSimpleHeaderLine tumorSampleHeader = new VCFSimpleHeaderLine("SAMPLE", tumorSampleHeaderAttributes);
+            final VCFSimpleHeaderLine tumorSampleHeader = new VCFSimpleHeaderLine("SAMPLE", tumorSampleHeaderAttributes);

            sampleLines.add(normalSampleHeader);
            sampleLines.add(tumorSampleHeader);
@ -455,7 +510,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    private List<String> getOutputSampleNames(){
        if (printTCGAsampleHeader) {
         //Already checked for exactly 1 tumor and 1 normal in printTCGAsampleHeader assignment in initialize()
-            List<String> sampleNamePlaceholders = new ArrayList<>(2);
+            final List<String> sampleNamePlaceholders = new ArrayList<>(2);
            sampleNamePlaceholders.add("TUMOR");
            sampleNamePlaceholders.add("NORMAL");
            return sampleNamePlaceholders;
@ -466,14 +521,14 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    @Override
-    public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    public ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
        if( context == null || context.getBasePileup().isEmpty() )
            // if we don't have any data, just abort early
            return new ActivityProfileState(ref.getLocus(), 0.0);

        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
-        AlignmentContext tumorContext = splitContexts.get(tumorSampleName);
-        AlignmentContext normalContext = splitContexts.get(normalSampleName);
+        final AlignmentContext tumorContext = splitContexts.get(tumorSampleName);
+        final AlignmentContext normalContext = splitContexts.get(normalSampleName);

        // if there are no tumor reads... there is no activity!
        if (tumorContext == null) {
@ -481,7 +536,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }

        // KCIBUL -- this method was inlined and modified from ReferenceConfidenceModel
-        ReadBackedPileup tumorPileup = tumorContext.getBasePileup().getMappingFilteredPileup(MQthreshold);
+        final ReadBackedPileup tumorPileup = tumorContext.getBasePileup().getMappingFilteredPileup(MQthreshold);
        final double[] tumorGLs = calcGenotypeLikelihoodsOfRefVsAny(tumorPileup, ref.getBase(), MIN_BASE_QUALTY_SCORE);
        final double tumorLod = tumorGLs[1] - tumorGLs[0];

@ -495,7 +550,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            // in any case, we have to handle the case where there is no normal (and thus no normal context) which is
            // different than having a normal but having no reads (where we should not enter the active region)
            if (normalSampleName != null && normalContext != null) {
-                int nonRefInNormal = getCountOfNonRefEvents(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE);
+                final int nonRefInNormal = getCountOfNonRefEvents(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE);

                final double[] normalGLs = calcGenotypeLikelihoodsOfRefVsAny(normalContext.getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, 0.5f);
                final double normalLod = normalGLs[0] - normalGLs[1];
@ -528,13 +583,12 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i

        // No reads here so nothing to do!
        if( originalActiveRegion.size() == 0 ) { return referenceModelForNoVariation(originalActiveRegion, true); }
-
        logReadInfo(DEBUG_READ_NAME, originalActiveRegion.getReads(), "Present in original active region");

        // create the assembly using just high quality reads (Q20 or higher).  We want to use lower
        // quality reads in the PairHMM (and especially in the normal) later, so we can't use a ReadFilter
-        ActiveRegion assemblyActiveRegion = new ActiveRegion(originalActiveRegion.getLocation(), originalActiveRegion.getSupportingStates(),originalActiveRegion.isActive(), getToolkit().getGenomeLocParser(), originalActiveRegion.getExtension());
-        for (GATKSAMRecord rec : originalActiveRegion.getReads()) {
+        final ActiveRegion assemblyActiveRegion = new ActiveRegion(originalActiveRegion.getLocation(), originalActiveRegion.getSupportingStates(),originalActiveRegion.isActive(), getToolkit().getGenomeLocParser(), originalActiveRegion.getExtension());
+        for (final GATKSAMRecord rec : originalActiveRegion.getReads()) {
            if (rec.getMappingQuality() >= MQthreshold ) {
                assemblyActiveRegion.add(rec);
            }
@ -558,18 +612,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // Stop the trimming madness!!!
        if (!trimmingResult.isVariationPresent())
            return referenceModelForNoVariation(originalActiveRegion,false);
-
        logReadInfo(DEBUG_READ_NAME, trimmingResult.getCallableRegion().getReads(), "Present in trimming result");

        final AssemblyResultSet assemblyResult =
                trimmingResult.needsTrimming() ? untrimmedAssemblyResult.trimTo(trimmingResult.getCallableRegion()) : untrimmedAssemblyResult;

-//        final AssemblyResultSet assemblyResult = untrimmedAssemblyResult;
-
-        // after talking to Ryan -- they grab the reads out of the assembly (and trim then) to pass into the PairHMM
-        // because at one point they were trying error correcting of the reads based on the haplotypes.. but that is not
-        // working out, so it's safe for us just to take the reads
-//
        final ActiveRegion regionForGenotyping = assemblyResult.getRegionForGenotyping();
        logReadInfo(DEBUG_READ_NAME, regionForGenotyping.getReads(), "Present in region for genotyping");

@ -577,10 +624,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            haplotypeBAMWriter.addDroppedReadsFromDelta(DroppedReadsTracker.Reason.TRIMMMED, originalActiveRegion.getReads(), regionForGenotyping.getReads());
        }

-//
-//        final ActiveRegion regionForGenotyping = trimmingResult.getCallableRegion();
-
-//        final ActiveRegion regionForGenotyping = originalActiveRegion;

        // filter out reads from genotyping which fail mapping quality based criteria
        //TODO - why don't do this before any assembly is done? Why not just once at the beginning of this method
@ -594,7 +637,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }

        final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample(filteredReads);
-
        logReadInfo(DEBUG_READ_NAME, regionForGenotyping.getReads(), "Present in region for genotyping after filtering reads");

        // abort early if something is out of the acceptable range
@ -612,17 +654,17 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i

        final List<Haplotype> haplotypes = assemblyResult.getHaplotypeList();
        final Map<String,List<GATKSAMRecord>> reads = splitReadsBySample( regionForGenotyping.getReads() );
-        for (List<GATKSAMRecord> rec : reads.values()) {
+        for (final List<GATKSAMRecord> rec : reads.values()) {
            logReadInfo(DEBUG_READ_NAME, rec, "Present after splitting assemblyResult by sample");
        }

        final HashMap<String, Integer> ARreads_origNormalMQ = new HashMap<>();
-        for (GATKSAMRecord read : regionForGenotyping.getReads()) {
+        for (final GATKSAMRecord read : regionForGenotyping.getReads()) {
            ARreads_origNormalMQ.put(read.getReadName(), read.getMappingQuality());
        }

        // modify MAPQ scores in normal to be high so that we don't do any base quality score capping
-        for(GATKSAMRecord rec : regionForGenotyping.getReads()) {
+        for(final GATKSAMRecord rec : regionForGenotyping.getReads()) {
            if (isReadFromNormal(rec)) {
                rec.setMappingQuality(60);
            }
@ -647,8 +689,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }

        readLikelihoods.changeReads(readRealignments);
-
-        for (GATKSAMRecord rec : readRealignments.keySet()) {
+        for (final GATKSAMRecord rec : readRealignments.keySet()) {
            logReadInfo(DEBUG_READ_NAME, rec, "Present after computing read likelihoods");
        }

@ -666,15 +707,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                assemblyResult.getFullReferenceWithPadding(),
                assemblyResult.getPaddedReferenceLoc(),
                regionForGenotyping.getLocation(),
-                getToolkit().getGenomeLocParser(),
                metaDataTracker,
-                givenAlleles, false ,
-                tumorSampleName,
-                normalSampleName,
-                dbsnp.dbsnp,
-                cosmicRod,
-                DEBUG_READ_NAME
-                );
+                givenAlleles);

        if ( MTAC.bamWriter != null ) {
            final Set<Haplotype> calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes());
@ -695,16 +729,16 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        if( MTAC.DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }


-        List<VariantContext> annotatedCalls = new ArrayList<>();
-        int eventCount = calledHaplotypes.getCalls().size();
+        final List<VariantContext> annotatedCalls = new ArrayList<>();
+        final int eventCount = calledHaplotypes.getCalls().size();
        Integer minEventDistance = null;
        Integer maxEventDistance = null;
        Integer lastPosition = null;
-        for (VariantContext vc : calledHaplotypes.getCalls()) {
+        for (final VariantContext vc : calledHaplotypes.getCalls()) {
            if (lastPosition == null) {
                lastPosition = vc.getStart();
            } else {
-                int dist = Math.abs(vc.getStart() - lastPosition);
+                final int dist = Math.abs(vc.getStart() - lastPosition);
                if (maxEventDistance == null || dist > maxEventDistance) {
                    maxEventDistance = dist;
                }
@ -713,23 +747,23 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                }
            }
        }
-        Map<String, Object> eventDistanceAttributes = new HashMap<>();
+        final Map<String, Object> eventDistanceAttributes = new HashMap<>();
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, eventCount);
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_DISTANCE_MIN_KEY, minEventDistance);
        eventDistanceAttributes.put(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY, maxEventDistance);


        // can we do this with the Annotation classes instead?
-        for (VariantContext originalVC : calledHaplotypes.getCalls()) {
-            VariantContextBuilder vcb = new VariantContextBuilder(originalVC);
+        for (final VariantContext originalVC : calledHaplotypes.getCalls()) {
+            final VariantContextBuilder vcb = new VariantContextBuilder(originalVC);

-            Map<String, Object> attributes = new HashMap<>(originalVC.getAttributes());
+            final Map<String, Object> attributes = new HashMap<>(originalVC.getAttributes());
            attributes.putAll(eventDistanceAttributes);
            vcb.attributes(attributes);

-            Set<String> filters = new HashSet<>(originalVC.getFilters());
+            final Set<String> filters = new HashSet<>(originalVC.getFilters());

-            double tumorLod = originalVC.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, -1);
+            final double tumorLod = originalVC.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, -1);
            if (tumorLod < MTAC.TUMOR_LOD_THRESHOLD) {
                filters.add(GATKVCFConstants.TUMOR_LOD_FILTER_NAME);
            }
@ -739,22 +773,12 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                 filters.addAll(calculateFilters(metaDataTracker, originalVC, eventDistanceAttributes));
            }

-            if (filters.size() > 0) {
-                vcb.filters(filters);
-            } else {
-                vcb.passFilters();
-            }
+            vcb.filters(filters.isEmpty() ? VariantContext.PASSES_FILTERS : filters);

            if (printTCGAsampleHeader) {
-                GenotypesContext genotypesWithBamSampleNames = originalVC.getGenotypes();
-                List<Genotype> renamedGenotypes = new ArrayList<>();
-                GenotypeBuilder GTbuilder = new GenotypeBuilder(genotypesWithBamSampleNames.get(tumorSampleName));
-                GTbuilder.name("TUMOR");
-                renamedGenotypes.add(GTbuilder.make());
-                GTbuilder = new GenotypeBuilder(genotypesWithBamSampleNames.get(normalSampleName));
-                GTbuilder.name("NORMAL");
-                renamedGenotypes.add(GTbuilder.make());
-                vcb.genotypes(renamedGenotypes);
+                final Genotype tumorGenotype = new GenotypeBuilder(originalVC.getGenotype(tumorSampleName)).name("TUMOR").make();
+                final Genotype normalGenotype = new GenotypeBuilder(originalVC.getGenotype(normalSampleName)).name("NORMAL").make();
+                vcb.genotypes(Arrays.asList(tumorGenotype, normalGenotype));
            }

            annotatedCalls.add(vcb.make());
@ -765,15 +789,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return annotatedCalls;
    }

-    private Set<String> calculateFilters(RefMetaDataTracker metaDataTracker, VariantContext vc, Map<String, Object> eventDistanceAttributes) {
-        Set<String> filters = new HashSet<>();
+    private Set<String> calculateFilters(final RefMetaDataTracker metaDataTracker, final VariantContext vc, final Map<String, Object> eventDistanceAttributes) {
+        final Set<String> filters = new HashSet<>();

-        Integer eventCount = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY);
-        Integer maxEventDistance = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY);
+        final Integer eventCount = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY);
+        final Integer maxEventDistance = (Integer) eventDistanceAttributes.get(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY);

-        Collection<VariantContext> panelOfNormalsVC = metaDataTracker.getValues(normalPanelRod,
+        final Collection<VariantContext> panelOfNormalsVC = metaDataTracker.getValues(MTAC.normalPanelRod,
                getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart()));
-        VariantContext ponVc = panelOfNormalsVC.isEmpty()?null:panelOfNormalsVC.iterator().next();
+        final VariantContext ponVc = panelOfNormalsVC.isEmpty()?null:panelOfNormalsVC.iterator().next();

        if (ponVc != null) {
            filters.add(GATKVCFConstants.PON_FILTER_NAME);
@ -788,13 +812,13 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        double normalF = 0;
        int normalAltQualityScoreSum = 0;
        if (hasNormal()) {
-            Genotype normalGenotype = vc.getGenotype(normalSampleName);
+            final Genotype normalGenotype = vc.getGenotype(normalSampleName);

            // NOTE: how do we get the non-ref depth here?
            normalAltCounts = normalGenotype.getAD()[1];
            normalF = (Double) normalGenotype.getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY);

-            Object qss = normalGenotype.getExtendedAttribute(GATKVCFConstants.QUALITY_SCORE_SUM_KEY);
+            final Object qss = normalGenotype.getExtendedAttribute(GATKVCFConstants.QUALITY_SCORE_SUM_KEY);
            if (qss != null) {
                normalAltQualityScoreSum = (Integer) ((Object[]) qss)[1];
            } else {
@ -819,11 +843,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        // such as ACTACTACT -> ACTACT, are overwhelmingly false positives so we
        // hard filter them out by default
        if (vc.isIndel()) {
-            ArrayList rpa = (ArrayList) vc.getAttribute(GATKVCFConstants.REPEATS_PER_ALLELE_KEY);
-            String ru = vc.getAttributeAsString(GATKVCFConstants.REPEAT_UNIT_KEY, "");
+            final ArrayList rpa = (ArrayList) vc.getAttribute(GATKVCFConstants.REPEATS_PER_ALLELE_KEY);
+            final String ru = vc.getAttributeAsString(GATKVCFConstants.REPEAT_UNIT_KEY, "");
            if (rpa != null && rpa.size() > 1 && ru.length() > 1) {
-                int refCount = (Integer) rpa.get(0);
-                int altCount = (Integer) rpa.get(1);
+                final int refCount = (Integer) rpa.get(0);
+                final int altCount = (Integer) rpa.get(1);

                if (refCount - altCount == 1) {
                    filters.add(GATKVCFConstants.STR_CONTRACTION_FILTER_NAME);
@ -840,10 +864,10 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i

        // clustered read position filter
        if (MTAC.ENABLE_CLUSTERED_READ_POSITION_FILTER){
-            Double tumorFwdPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_LEFT_OFFSET_KEY);
-            Double tumorRevPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_RIGHT_OFFSET_KEY);
-            Double tumorFwdPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_LEFT_OFFSET_KEY);
-            Double tumorRevPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_RIGHT_OFFSET_KEY);
+            final Double tumorFwdPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_LEFT_OFFSET_KEY);
+            final Double tumorRevPosMedian = (Double) vc.getAttribute(GATKVCFConstants.MEDIAN_RIGHT_OFFSET_KEY);
+            final Double tumorFwdPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_LEFT_OFFSET_KEY);
+            final Double tumorRevPosMAD = (Double) vc.getAttribute(GATKVCFConstants.MAD_MEDIAN_RIGHT_OFFSET_KEY);
            //If the variant is near the read end (median threshold) and the positions are very similar (MAD threshold) then filter
            if ( (tumorFwdPosMedian != null && tumorFwdPosMedian <= MTAC.PIR_MEDIAN_THRESHOLD && tumorFwdPosMAD != null && tumorFwdPosMAD <= MTAC.PIR_MAD_THRESHOLD) ||
                    (tumorRevPosMedian != null && tumorRevPosMedian <= MTAC.PIR_MEDIAN_THRESHOLD && tumorRevPosMAD != null && tumorRevPosMAD <= MTAC.PIR_MAD_THRESHOLD))
@ -866,14 +890,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
     */
    protected double[] calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final double f) {
        final double[] genotypeLikelihoods = new double[2];
-        int AA = 0, AB=1;
+        final int AA = 0;
+        final int AB=1;
        for( final PileupElement p : pileup ) {
            final byte qual = (p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual());
            if( p.isDeletion() || qual > minBaseQual ) {

                // TODO: why not use base qualities here?
                //double pobs = QualityUtils.qualToErrorProbLog10(qual);
-                double pobs = 1.0d - pow(10, (30 / -10.0));
+                final double pobs = 1.0d - pow(10, (30 / -10.0));
                if( isNonRef(refBase, p)) {
                    genotypeLikelihoods[AB] += Math.log10(f*pobs + (1-f)*pobs/3.0d);
                    genotypeLikelihoods[AA] += Math.log10((1-pobs)/3);
@ -905,7 +930,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    protected double[] calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual) {
-        double f = calculateF(pileup, refBase, minBaseQual);
+        final double f = calculateF(pileup, refBase, minBaseQual);
        return calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, minBaseQual, f);
    }

@ -923,11 +948,10 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
                }
            }
        }
-        double f = (double) altCount / ((double) refCount + (double) altCount);
-        return f;
+        return (double) altCount / (refCount + altCount);
    }

-    private boolean isNonRef(byte refBase, PileupElement p) {
+    private boolean isNonRef(final byte refBase, final PileupElement p) {
        return p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart() || p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip();
    }

@ -960,8 +984,8 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return readsToRemove;
    }

-    private static GATKSAMRecord findReadByName(Collection<GATKSAMRecord> reads, String name) {
-        for(GATKSAMRecord read : reads) {
+    private static GATKSAMRecord findReadByName(final Collection<GATKSAMRecord> reads, final String name) {
+        for(final GATKSAMRecord read : reads) {
            if (name.equals(read.getReadName())) return read;
        }
        return null;
@ -1022,7 +1046,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    @Override
-    public Integer reduce(List<VariantContext> callsInRegion, Integer numCalledRegions) {
+    public Integer reduce(final List<VariantContext> callsInRegion, final Integer numCalledRegions) {
        for( final VariantContext call : callsInRegion ) {
            vcfWriter.add( call );
        }
@ -1030,7 +1054,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    }

    @Override
-    public void onTraversalDone(Integer result) {
+    public void onTraversalDone(final Integer result) {
 //        if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it
 //        referenceConfidenceModel.close();
        //TODO remove the need to call close here for debugging, the likelihood output stream should be managed
@ -1045,114 +1069,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
    public boolean alwaysAppendDbsnpId() { return false; }

-    /**
-     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
-     * dbSNP overlap is only used to require more evidence of absence in the normal if the variant in question has been seen before in germline.
-     */
-    @ArgumentCollection
-    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
-    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
-
-    /**
-     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
-     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
-     *  Records that are filtered in the comp track will be ignored.
-     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
-     */
-    @Advanced
-    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
-    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
-    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
-
-
-
-    /**
-     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
-     */
-    @Advanced
-    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
-//    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"ClippingRankSumTest", "DepthPerSampleHC"}));
-//    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBy ruSample", "TandemRepeatAnnotator",
-//        "RMSMappingQuality","MappingQualityRankSumTest","FisherStrand","StrandOddsRatio","ReadPosRankSumTest","QualByDepth", "Coverage"}));
-    protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"DepthPerAlleleBySample", "BaseQualitySumPerAlleleBySample", "TandemRepeatAnnotator", "OxoGReadCounts"}));
-
-    /**
-     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
-     * so annotations will be excluded even if they are explicitly included with the other options.
-     */
-    @Advanced
-    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
-    protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions"}));
-
-    /**
-     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
-     */
-    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
-    //protected String[] annotationGroupsToUse = { StandardAnnotation.class.getSimpleName() };
-    protected String[] annotationClassesToUse = { };
-
-    /**
-     * A raw, unfiltered, highly sensitive callset in VCF format.
-     */
-    @Output(doc="File to which variants should be written")
-    protected VariantContextWriter vcfWriter = null;
-
-    /**
-     * Active region trimmer reference.
-     */
-    @ArgumentCollection
-    protected ActiveRegionTrimmer trimmer = new ActiveRegionTrimmer();
-
-    @Hidden
-    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
-    protected String keepRG = null;
-
-
-
-
-    /**
-     * The minimum confidence needed for a given base for it to be used in variant calling.
-     */
-    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
-    public byte MIN_BASE_QUALTY_SCORE = 10;
-
-
-
-// PAIR-HMM-Related Goodness
-
-//    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE;
-//    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.AGGRESSIVE;
-    public PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL pcrErrorModel = PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.HOSTILE;
-
-    // Parameters to control read error correction
-    @Hidden
-    @Argument(fullName="errorCorrectReads", shortName="errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly.  May cause fundamental problems with the assembly graph itself", required=false)
-    protected boolean errorCorrectReads = false;
-
-    @Hidden
-    @Argument(fullName="captureAssemblyFailureBAM", shortName="captureAssemblyFailureBAM", doc="If specified, we will write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false)
-    protected boolean captureAssemblyFailureBAM = false;
-
-    @Advanced
-    @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false)
-    protected boolean dontUseSoftClippedBases = false;
-
-    @Hidden
-    @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code.  Useful for benchmarking and scalability testing", required=false)
-    protected boolean justDetermineActiveRegions = false;
-
-
-
-
-    // reference base padding size
-    private static final int REFERENCE_PADDING = 500;
-
-    private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6;
-    private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
-    private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
-
-
-
    /**
     * High-level function that runs the assembler on the active region reads,
     * returning a data structure with the resulting information needed
@ -1224,14 +1140,11 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
            if( !clippedRead.isEmpty() && clippedRead.getCigar().getReadLength() > 0 ) {
                clippedRead = ReadClipper.hardClipToRegion(clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop());
                if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
-                    //logger.info("Keeping read " + clippedRead + " start " + clippedRead.getAlignmentStart() + " end " + clippedRead.getAlignmentEnd());
                    readsToUse.add(clippedRead);
                }
            }
        }

-        // TODO -- Performance optimization: we partition the reads by sample 4 times right now; let's unify that code.
-
        final List<GATKSAMRecord> downsampledReads = DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart);

        if ( MTAC.bamWriter != null && MTAC.emitDroppedReads ) {
@ -1268,9 +1181,9 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
     *
     * @param reads the list of reads to consider
     */
-    private void cleanOverlappingReadPairs(final List<GATKSAMRecord> reads, Set<String> normalSampleNames) {
-        Map<String, List<GATKSAMRecord>> data = splitReadsBySample(reads);
-        for ( String sampleName : data.keySet() ) {
+    private void cleanOverlappingReadPairs(final List<GATKSAMRecord> reads, final Set<String> normalSampleNames) {
+        final Map<String, List<GATKSAMRecord>> data = splitReadsBySample(reads);
+        for ( final String sampleName : data.keySet() ) {
            final boolean isTumor = !normalSampleNames.contains(sampleName);
            final List<GATKSAMRecord> perSampleReadList = data.get(sampleName);

@ -1284,16 +1197,15 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        }
    }

-    public static void logReadInfo(String readName, Collection<GATKSAMRecord> records, String message) {
+    public static void logReadInfo(final String readName, final Collection<GATKSAMRecord> records, final String message) {
        if (readName != null) {
-            for (GATKSAMRecord rec : records) {
+            for (final GATKSAMRecord rec : records) {
                logReadInfo(readName, rec, message);
            }
-
        }
    }

-    public static void logReadInfo(String readName, GATKSAMRecord rec, String message) {
+    public static void logReadInfo(final String readName, final GATKSAMRecord rec, final String message) {
        if (readName != null && rec != null && readName.equals(rec.getReadName())) {
            logger.info("Found " + rec.toString() + " - " + message);
        }
@ -1322,7 +1234,7 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
        return result;
    }

-    private boolean isReadFromNormal(GATKSAMRecord rec) {
+    private boolean isReadFromNormal(final GATKSAMRecord rec) {
        return normalSampleName != null && normalSampleName.equals(rec.getReadGroup().getSample());

    }
@ -1338,7 +1250,6 @@ public class MuTect2 extends ActiveRegionWalker<List<VariantContext>, Integer> i
    @Advanced
    @Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
    protected boolean doNotRunPhysicalPhasing = false;
-
 }


--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/SomaticGenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/SomaticGenotypingEngine.java
@ -54,9 +54,11 @@ package org.broadinstitute.gatk.tools.walkers.cancer.m2;
 import com.google.java.contract.Ensures;
 import htsjdk.samtools.util.StringUtil;
 import htsjdk.variant.variantcontext.*;
+import org.apache.commons.collections.ListUtils;
 import org.apache.commons.lang.mutable.MutableDouble;
 import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.log4j.Logger;
+import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
 import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
 import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCallerGenotypingEngine;
 import org.broadinstitute.gatk.utils.GenomeLoc;
@ -78,16 +80,33 @@ import java.util.*;

 public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {

-    protected final M2ArgumentCollection MTAC;
+    private final M2ArgumentCollection MTAC;
    private final TumorPowerCalculator strandArtifactPowerCalculator;
-    final boolean REF_AND_ALT = false;
-    final boolean ALT_ONLY = true;
+
+    private final String tumorSampleName;
+    private final String matchedNormalSampleName;
+    private final String DEBUG_READ_NAME;
+
+    // {@link GenotypingEngine} requires a non-null {@link AFCalculatorProvider} but this class doesn't need it.  Thus we make a dummy
+    private static AFCalculatorProvider DUMMY_AF_CALCULATOR_PROVIDER = new AFCalculatorProvider() {
+        public AFCalculator getInstance(final int ploidy, final int maximumAltAlleles) { return null; }
+    };

    private final static Logger logger = Logger.getLogger(SomaticGenotypingEngine.class);

-    public SomaticGenotypingEngine(final M2ArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider, final boolean doPhysicalPhasing, final M2ArgumentCollection MTAC) {
-        super(configuration, samples, genomeLocParser, afCalculatorProvider, doPhysicalPhasing);
+    public SomaticGenotypingEngine(final M2ArgumentCollection configuration,
+                                   final SampleList samples,
+                                   final GenomeLocParser genomeLocParser,
+                                   final boolean doPhysicalPhasing,
+                                   final M2ArgumentCollection MTAC,
+                                   final String tumorSampleName,
+                                   final String matchedNormalSampleName,
+                                   final String DEBUG_READ_NAME) {
+        super(configuration, samples, genomeLocParser, DUMMY_AF_CALCULATOR_PROVIDER, doPhysicalPhasing);
        this.MTAC = MTAC;
+        this.tumorSampleName = tumorSampleName;
+        this.matchedNormalSampleName = matchedNormalSampleName;
+        this.DEBUG_READ_NAME = DEBUG_READ_NAME;
        // coverage related initialization
        final double errorProbability = Math.pow(10, -(MTAC.POWER_CONSTANT_QSCORE/10));
        strandArtifactPowerCalculator = new TumorPowerCalculator(errorProbability, MTAC.STRAND_ARTIFACT_LOD_THRESHOLD, 0.0f);
@ -105,37 +124,22 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
     * @param ref                                    Reference bytes at active region
     * @param refLoc                                 Corresponding active region genome location
     * @param activeRegionWindow                     Active window
-     * @param genomeLocParser                        GenomeLocParser
     * @param activeAllelesToGenotype                Alleles to genotype
-     * @param emitReferenceConfidence whether we should add a &lt;NON_REF&gt; alternative allele to the result variation contexts.
     *
     * @return                                       A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
     *
     */
-//    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    @Ensures("result != null")
    // TODO - can this be refactored? this is hard to follow!
    public CalledHaplotypes callMutations (
-                                                       final List<Haplotype> haplotypes,
-                                                       //final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
-                                                       final ReadLikelihoods<Haplotype> readLikelihoods,
-                                                       final Map<String, Integer> originalNormalReadQualities,
-                                                       final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
-                                                       final byte[] ref,
-                                                       final GenomeLoc refLoc,
-                                                       final GenomeLoc activeRegionWindow,
-                                                       final GenomeLocParser genomeLocParser,
-                                                       final RefMetaDataTracker tracker,
-                                                       final List<VariantContext> activeAllelesToGenotype,
-                                                       final boolean emitReferenceConfidence,
-                                                       final String tumorSampleName,
-                                                       final String matchedNormalSampleName,
-                                                       final RodBinding<VariantContext> dbsnpRod,
-                                                       final List<RodBinding<VariantContext>> cosmicRod,
-                                                       final String DEBUG_READ_NAME
-
-    ) {
-
+            final List<Haplotype> haplotypes,
+            final ReadLikelihoods<Haplotype> readLikelihoods,
+            final Map<String, Integer> originalNormalReadQualities,
+            final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
+            final byte[] ref,
+            final GenomeLoc refLoc,
+            final GenomeLoc activeRegionWindow,
+            final RefMetaDataTracker tracker,
+            final List<VariantContext> activeAllelesToGenotype) {
        // sanity check input arguments
        if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
        if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
@ -143,8 +147,6 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        if (refLoc == null || refLoc.size() != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
        if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
        if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
-        if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
-

        // Somatic Tumor/Normal Sample Handling
        verifySamplePresence(tumorSampleName, readLikelihoods.samples());
@ -159,293 +161,217 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        final List<VariantContext> returnCalls = new ArrayList<>();

        for( final int loc : startPosKeySet ) {
-            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
-                final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
-
-                if( eventsAtThisLoc.isEmpty() ) { continue; }
-
-                // Create the event mapping object which maps the original haplotype events to the events present at just this locus
-                final Map<Event, List<Haplotype>> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes);
-
-                // Sanity check the priority list for mistakes
-                final List<String> priorityList = makePriorityList(eventsAtThisLoc);
-
-                // Merge the event to find a common reference representation
-
-                VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList,
-                        GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
-                        GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
-
-                if( mergedVC == null ) { continue; }
-
-                if (emitReferenceConfidence)
-                    mergedVC = addNonRefSymbolicAllele(mergedVC);
-
-                final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
-                mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
-                for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
-                    mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
-                }
-
-                final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
-
-                if( configuration.DEBUG && logger != null ) {
-                    if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
-                }
-
-                ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
-
-                //LDG: do we want to do this before or after pulling out overlapping reads?
-                if (MTAC.isSampleContaminationPresent())
-                    readAlleleLikelihoods.contaminationDownsampling(MTAC.getSampleContamination());
-
-                //if (!mergedVC.isBiallelic()) {
-                //    logger.info("[UNSUPPORTED] Detected non-Biallelic VC" + mergedVC.toString());
-                //    continue;
-                //}
-
-                // TODO: once tests are passing, refactor to use the new data structure (not the deprecated one)
-                // handle overlapping fragments
-                // TODO: CONFIRM WITH GSA IF IT IS OK TO REMOVE READS FROM THE PRALM (should be... they do it in filterPoorlyModeledReads!)
-                PerReadAlleleLikelihoodMap tumorPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(tumorSampleName));
-                filterPRALMForOverlappingReads(tumorPRALM, mergedVC.getReference(), loc, false);
-                MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in Tumor PRALM after filtering for overlapping reads");
-                // extend to multiple samples
-
-                // compute tumor LOD for each alternate allele
-                // TODO: somewhere we have to ensure that the all the alleles in the variant context is in alleleFractions passed to getHetGenotypeLogLikelihoods. getHetGenotypeLogLikelihoods will not check that for you
-                final PerAlleleCollection<Double> altAlleleFractions = estimateAlleleFraction(mergedVC, tumorPRALM, false);
-                final PerAlleleCollection<Double> tumorHetGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, tumorPRALM, originalNormalReadQualities, altAlleleFractions);
-
-                if( configuration.DEBUG && logger != null ) {
-                    StringBuilder outputSB = new StringBuilder("Calculated allelic fraction at " + loc + " = [");
-                    for (Allele allele : altAlleleFractions.getAltAlleles()){
-                        outputSB.append( allele + ": " + altAlleleFractions.getAlt(allele) + ", ");
-                    }
-                    outputSB.append("]");
-                    logger.info(outputSB.toString());
-                }
-
-                final PerAlleleCollection<Double> tumorLods = PerAlleleCollection.createPerAltAlleleCollection();
-                for (Allele altAllele : mergedVC.getAlternateAlleles()){
-                    tumorLods.set(altAllele, tumorHetGenotypeLLs.get(altAllele) - tumorHetGenotypeLLs.getRef());
-                }
-
-                if (configuration.DEBUG && logger != null) {
-                    StringBuilder outputSB = new StringBuilder("Tumor LOD at " + loc + " = [");
-                    for (Allele altAllele : tumorLods.getAltAlleles()) {
-                        outputSB.append( altAllele + ": " + tumorLods.getAlt(altAllele) + ", ");
-                    }
-                    outputSB.append("]");
-                    logger.info(outputSB.toString());
-                }
-
-                double INIT_NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
-                double NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
-                PerReadAlleleLikelihoodMap normalPRALM = null;
-                PerAlleleCollection<Double> normalLods = PerAlleleCollection.createPerAltAlleleCollection();
-
-                // if normal bam is available, compute normal LOD
-                if (hasNormal) {
-                    normalPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(matchedNormalSampleName));
-                    filterPRALMForOverlappingReads(normalPRALM, mergedVC.getReference(), loc, true);
-                    MuTect2.logReadInfo(DEBUG_READ_NAME, normalPRALM.getLikelihoodReadMap().keySet(), "Present after in Nomral PRALM filtering for overlapping reads");
-
-                    GenomeLoc eventGenomeLoc = genomeLocParser.createGenomeLoc(activeRegionWindow.getContig(), loc);
-                    Collection<VariantContext> cosmicVC = tracker.getValues(cosmicRod, eventGenomeLoc);
-                    Collection<VariantContext> dbsnpVC = tracker.getValues(dbsnpRod, eventGenomeLoc);
-                    // remove the effect of cosmic from dbSNP
-                    final boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty());
-
-                    INIT_NORMAL_LOD_THRESHOLD = MTAC.INITIAL_NORMAL_LOD_THRESHOLD; //only set this if this job has a normal
-                    NORMAL_LOD_THRESHOLD = (germlineAtRisk)?MTAC.NORMAL_DBSNP_LOD_THRESHOLD:MTAC.NORMAL_LOD_THRESHOLD;
-
-
-                    // compute normal LOD = LL(X|REF)/LL(X|ALT) where ALT is the diploid HET with AF = 0.5
-                    // note normal LOD is REF over ALT, the reciprocal of the tumor LOD
-                    final PerAlleleCollection<Double> diploidHetAlleleFractions = PerAlleleCollection.createPerRefAndAltAlleleCollection();
-                    for (final Allele allele : mergedVC.getAlternateAlleles()){
-                        diploidHetAlleleFractions.setAlt(allele, 0.5);
-                    }
-
-                    final PerAlleleCollection<Double> normalGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, normalPRALM, originalNormalReadQualities, diploidHetAlleleFractions);
-
-                    for (final Allele altAllele : mergedVC.getAlternateAlleles()){
-                        normalLods.setAlt(altAllele, normalGenotypeLLs.getRef() - normalGenotypeLLs.getAlt(altAllele));
-                    }
-
-                }
-
-                int numPassingAlts = 0;
-                Set<Allele> allelesThatPassThreshold = new HashSet<>();
-                Allele alleleWithHighestTumorLOD = null;
-
-                // TODO: use lambda
-                for (Allele altAllele : mergedVC.getAlternateAlleles()) {
-                    final boolean passesTumorLodThreshold = tumorLods.getAlt(altAllele) >= MTAC.INITIAL_TUMOR_LOD_THRESHOLD;
-                    final boolean passesNormalLodThreshold = hasNormal ? normalLods.getAlt(altAllele) >= INIT_NORMAL_LOD_THRESHOLD : true;
-                    if (passesTumorLodThreshold && passesNormalLodThreshold) {
-                        numPassingAlts++;
-                        allelesThatPassThreshold.add(altAllele);
-                        if (alleleWithHighestTumorLOD == null
-                                || tumorLods.getAlt(altAllele) > tumorLods.getAlt(alleleWithHighestTumorLOD)){
-                            alleleWithHighestTumorLOD = altAllele;
-                        }
-                    }
-                }
-
-                final boolean emitVariant = numPassingAlts > 0;
-
-                VariantContext call = null;
-                if (emitVariant) {
-                    VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC);
-                    // FIXME: can simply get first alternate since above we only deal with Bi-allelic sites...
-                    int haplotypeCount = alleleMapper.get(mergedVC.getAlternateAllele(0)).size();
-                    callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
-                    callVcb.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorLods.getAlt(alleleWithHighestTumorLOD));
-
-                    if (hasNormal) {
-                        callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, normalLods.getAlt(alleleWithHighestTumorLOD));
-                        if (normalLods.getAlt(alleleWithHighestTumorLOD) < NORMAL_LOD_THRESHOLD) {
-                            callVcb.filter(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME);
-                        }
-                    }
-
-                    // M1-style strand artifact filter
-                    // TODO: move code to MuTect2::calculateFilters()
-                    // skip if VC has multiple alleles - it will get filtered later anyway
-                    if (MTAC.ENABLE_STRAND_ARTIFACT_FILTER && numPassingAlts == 1) {
-                        final PerReadAlleleLikelihoodMap forwardPRALM = new PerReadAlleleLikelihoodMap();
-                        final PerReadAlleleLikelihoodMap reversePRALM = new PerReadAlleleLikelihoodMap();
-                        splitPRALMintoForwardAndReverseReads(tumorPRALM, forwardPRALM, reversePRALM);
-
-                        MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in tumor PRALM after PRALM is split");
-                        MuTect2.logReadInfo(DEBUG_READ_NAME, forwardPRALM.getLikelihoodReadMap().keySet(), "Present in forward PRALM after PRALM is split");
-                        MuTect2.logReadInfo(DEBUG_READ_NAME, reversePRALM.getLikelihoodReadMap().keySet(), "Present in reverse PRALM after PRALM is split");
-
-                        // TODO: build a new type for probability, likelihood, and log_likelihood. e.g. f_fwd :: probability[], tumorGLs_fwd :: likelihood[]
-                        // TODO: don't want to call getHetGenotypeLogLikelihoods on more than one alternate alelle. May need to overload it to take a scalar f_fwd.
-                        final PerAlleleCollection<Double> alleleFractionsForward = estimateAlleleFraction(mergedVC, forwardPRALM, true);
-                        final PerAlleleCollection<Double> tumorGenotypeLLForward = getHetGenotypeLogLikelihoods(mergedVC, forwardPRALM, originalNormalReadQualities, alleleFractionsForward);
-
-                        final PerAlleleCollection<Double> alleleFractionsReverse = estimateAlleleFraction(mergedVC, reversePRALM, true);
-                        final PerAlleleCollection<Double> tumorGenotypeLLReverse = getHetGenotypeLogLikelihoods(mergedVC, reversePRALM, originalNormalReadQualities, alleleFractionsReverse);
-
-                        if( configuration.DEBUG && logger != null ) {
-                            StringBuilder forwardMessage = new StringBuilder("Calculated forward allelic fraction at " + loc + " = [");
-                            StringBuilder reverseMessage = new StringBuilder("Calculated reverse allelic fraction at " + loc + " = [");
-
-                            for (Allele altAllele : altAlleleFractions.getAltAlleles()){
-                                forwardMessage.append( altAllele + ": " + alleleFractionsForward.getAlt(altAllele) + ", ");
-                                reverseMessage.append( altAllele + ": " + alleleFractionsReverse.getAlt(altAllele) + ", ");
-                            }
-
-                            forwardMessage.append("]");
-                            reverseMessage.append("]");
-
-                            logger.info(forwardMessage.toString());
-                            logger.info(reverseMessage.toString());
-                        }
-
-                        double tumorLod_fwd = tumorGenotypeLLForward.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLForward.getRef();
-                        double tumorLod_rev = tumorGenotypeLLReverse.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLReverse.getRef();
-
-                        double tumorSBpower_fwd = 0.0;
-                        double tumorSBpower_rev = 0.0;
-                        try {
-                            // Note that we use the observed combined (+ and -) allele fraction for power calculation in either direction
-                            tumorSBpower_fwd = strandArtifactPowerCalculator.cachedPowerCalculation(forwardPRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
-                            tumorSBpower_rev = strandArtifactPowerCalculator.cachedPowerCalculation(reversePRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
-                        }
-                        catch (Throwable t) {
-                            System.err.println("Error processing " + activeRegionWindow.getContig() + ":" + loc);
-                            t.printStackTrace(System.err);
-                            throw new RuntimeException(t);
-                        }
-
-                        callVcb.attribute(GATKVCFConstants.TLOD_FWD_KEY, tumorLod_fwd);
-                        callVcb.attribute(GATKVCFConstants.TLOD_REV_KEY, tumorLod_rev);
-                        callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_FWD_KEY, tumorSBpower_fwd);
-                        callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_REV_KEY, tumorSBpower_rev);
-                        // TODO: add vcf INFO fields. see callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
-
-                        if ((tumorSBpower_fwd > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_fwd < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD) ||
-                                (tumorSBpower_rev > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_rev < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD))
-                            callVcb.filter(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
-                    }
-
-                    // TODO: this probably belongs in M2::calculateFilters()
-                    if (numPassingAlts > 1) {
-                        callVcb.filter(GATKVCFConstants.TRIALLELIC_SITE_FILTER_NAME);
-                    }
-
-                    // build genotypes TODO: this part needs review and refactor
-                    List<Allele> tumorAlleles = new ArrayList<>();
-                    tumorAlleles.add(mergedVC.getReference());
-                    tumorAlleles.add(alleleWithHighestTumorLOD);
-                    Genotype tumorGenotype = new GenotypeBuilder(tumorSampleName, tumorAlleles)
-                            .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, altAlleleFractions.getAlt(alleleWithHighestTumorLOD))
-                            .make(); // TODO: add ADs?
-                    List<Genotype> genotypes = new ArrayList<>();
-                    genotypes.add(tumorGenotype);
-
-                    // We assume that the genotype in the normal is 0/0
-                    // TODO: is normal always homozygous reference?
-                    List<Allele> homRefAllelesforNormalGenotype = new ArrayList<>();
-                    homRefAllelesforNormalGenotype.addAll(Collections.nCopies(2, mergedVC.getReference()));
-
-                    // if we are calling with a normal, build the genotype for the sample to appear in vcf
-                    int REF = 0, ALT = 1;
-                    if (hasNormal) {
-                        PerAlleleCollection<Integer> normalCounts = getRefAltCount(mergedVC, normalPRALM, false);
-                        final int normalRefAlleleDepth = normalCounts.getRef();
-                        final int normalAltAlleleDepth = normalCounts.getAlt(alleleWithHighestTumorLOD);
-                        final int[] normalAlleleDepths = { normalRefAlleleDepth, normalAltAlleleDepth };
-                        final double normalAlleleFraction = (double) normalAltAlleleDepth / ( normalRefAlleleDepth + normalAltAlleleDepth);
-
-                        final Genotype normalGenotype = new GenotypeBuilder(matchedNormalSampleName, homRefAllelesforNormalGenotype)
-                                .AD(normalAlleleDepths)
-                                .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, normalAlleleFraction)
-                                .make();
-                        genotypes.add(normalGenotype);
-                    }
-
-                    //only use alleles found in the tumor (
-                    call = new VariantContextBuilder(callVcb).alleles(tumorAlleles).genotypes(genotypes).make();
-
-                }
-
-                // how should we be making use of _perSampleFilteredReadList_?
-                if( call != null ) {
-                    readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
-                            genomeLocParser, emitReferenceConfidence, alleleMapper, readAlleleLikelihoods, call);
-
-                    ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC.getChr(), mergedVC.getStart(), mergedVC.getEnd()), refLoc, ref);
-                    VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker, readAlleleLikelihoods, call, false);
-
-                    if( call.getAlleles().size() != mergedVC.getAlleles().size() )
-                        annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
-
-                    // maintain the set of all called haplotypes
-                    for ( final Allele calledAllele : call.getAlleles() ) {
-                        final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
-                        if (haplotypeList == null) continue;
-                        calledHaplotypes.addAll(haplotypeList);
-                    }
-
-                    returnCalls.add( annotatedCall );
-                }
-
+            if( loc < activeRegionWindow.getStart() || loc > activeRegionWindow.getStop() ) {
+                continue;
            }
+            final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
+
+            if( eventsAtThisLoc.isEmpty() ) { continue; }
+
+            // Create the event mapping object which maps the original haplotype events to the events present at just this locus
+            final Map<Event, List<Haplotype>> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes);
+
+            final List<String> priorityList = makePriorityList(eventsAtThisLoc);
+
+            VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList,
+                    GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
+                    GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+
+            if( mergedVC == null ) { continue; }
+
+            final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
+            mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
+            for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
+                mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
+            }
+
+            final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
+
+            ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
+
+            //LDG: do we want to do this before or after pulling out overlapping reads?
+            if (MTAC.isSampleContaminationPresent())
+                readAlleleLikelihoods.contaminationDownsampling(MTAC.getSampleContamination());
+
+            final PerReadAlleleLikelihoodMap tumorPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(tumorSampleName));
+            filterPRALMForOverlappingReads(tumorPRALM, mergedVC.getReference(), loc, false);
+            MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in Tumor PRALM after filtering for overlapping reads");
+            // extend to multiple samples
+
+            // compute tumor LOD for each alternate allele
+            // TODO: somewhere we have to ensure that the all the alleles in the variant context is in alleleFractions passed to getHetGenotypeLogLikelihoods. getHetGenotypeLogLikelihoods will not check that for you
+            final PerAlleleCollection<Double> altAlleleFractions = estimateAlleleFraction(mergedVC, tumorPRALM, false);
+            final PerAlleleCollection<Double> tumorHetGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, tumorPRALM, originalNormalReadQualities, altAlleleFractions);
+
+            final PerAlleleCollection<Double> tumorLods = PerAlleleCollection.createPerAltAlleleCollection();
+            for (final Allele altAllele : mergedVC.getAlternateAlleles()){
+                tumorLods.set(altAllele, tumorHetGenotypeLLs.get(altAllele) - tumorHetGenotypeLLs.getRef());
+            }
+
+            double INIT_NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
+            double NORMAL_LOD_THRESHOLD = -Double.MAX_VALUE;
+            PerReadAlleleLikelihoodMap normalPRALM = null;
+            final PerAlleleCollection<Double> normalLods = PerAlleleCollection.createPerAltAlleleCollection();
+
+            // if normal bam is available, compute normal LOD
+            if (hasNormal) {
+                normalPRALM = readAlleleLikelihoods.toPerReadAlleleLikelihoodMap(readAlleleLikelihoods.sampleIndex(matchedNormalSampleName));
+                filterPRALMForOverlappingReads(normalPRALM, mergedVC.getReference(), loc, true);
+                MuTect2.logReadInfo(DEBUG_READ_NAME, normalPRALM.getLikelihoodReadMap().keySet(), "Present after in Nomral PRALM filtering for overlapping reads");
+
+                final GenomeLoc eventGenomeLoc = genomeLocParser.createGenomeLoc(activeRegionWindow.getContig(), loc);
+                final Collection<VariantContext> cosmicVC = tracker.getValues(MTAC.cosmicRod, eventGenomeLoc);
+                final Collection<VariantContext> dbsnpVC = tracker.getValues(MTAC.dbsnp.dbsnp, eventGenomeLoc);
+                // remove the effect of cosmic from dbSNP
+                final boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty());
+
+                INIT_NORMAL_LOD_THRESHOLD = MTAC.INITIAL_NORMAL_LOD_THRESHOLD; //only set this if this job has a normal
+                NORMAL_LOD_THRESHOLD = (germlineAtRisk)?MTAC.NORMAL_DBSNP_LOD_THRESHOLD:MTAC.NORMAL_LOD_THRESHOLD;
+
+                // compute normal LOD = LL(X|REF)/LL(X|ALT) where ALT is the diploid HET with AF = 0.5
+                // note normal LOD is REF over ALT, the reciprocal of the tumor LOD
+                final PerAlleleCollection<Double> diploidHetAlleleFractions = PerAlleleCollection.createPerRefAndAltAlleleCollection();
+                for (final Allele allele : mergedVC.getAlternateAlleles()){
+                    diploidHetAlleleFractions.setAlt(allele, 0.5);
+                }
+
+                final PerAlleleCollection<Double> normalGenotypeLLs = getHetGenotypeLogLikelihoods(mergedVC, normalPRALM, originalNormalReadQualities, diploidHetAlleleFractions);
+
+                for (final Allele altAllele : mergedVC.getAlternateAlleles()){
+                    normalLods.setAlt(altAllele, normalGenotypeLLs.getRef() - normalGenotypeLLs.getAlt(altAllele));
+                }
+            }
+
+            int numPassingAlts = 0;
+            final Set<Allele> allelesThatPassThreshold = new HashSet<>();
+            Allele alleleWithHighestTumorLOD = null;
+
+            // TODO: use lambda
+            for (final Allele altAllele : mergedVC.getAlternateAlleles()) {
+                final boolean passesTumorLodThreshold = tumorLods.getAlt(altAllele) >= MTAC.INITIAL_TUMOR_LOD_THRESHOLD;
+                final boolean passesNormalLodThreshold = hasNormal ? normalLods.getAlt(altAllele) >= INIT_NORMAL_LOD_THRESHOLD : true;
+                if (passesTumorLodThreshold && passesNormalLodThreshold) {
+                    numPassingAlts++;
+                    allelesThatPassThreshold.add(altAllele);
+                    if (alleleWithHighestTumorLOD == null
+                            || tumorLods.getAlt(altAllele) > tumorLods.getAlt(alleleWithHighestTumorLOD)){
+                        alleleWithHighestTumorLOD = altAllele;
+                    }
+                }
+            }
+            if (numPassingAlts == 0) {
+                continue;
+            }
+
+
+            final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC);
+            // FIXME: can simply get first alternate since above we only deal with Bi-allelic sites...
+            final int haplotypeCount = alleleMapper.get(mergedVC.getAlternateAllele(0)).size();
+            callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
+            callVcb.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorLods.getAlt(alleleWithHighestTumorLOD));
+
+            if (hasNormal) {
+                callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, normalLods.getAlt(alleleWithHighestTumorLOD));
+                if (normalLods.getAlt(alleleWithHighestTumorLOD) < NORMAL_LOD_THRESHOLD) {
+                    callVcb.filter(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME);
+                }
+            }
+
+            // M1-style strand artifact filter
+            // TODO: move code to MuTect2::calculateFilters()
+            // skip if VC has multiple alleles - it will get filtered later anyway
+            if (MTAC.ENABLE_STRAND_ARTIFACT_FILTER && numPassingAlts == 1) {
+                final PerReadAlleleLikelihoodMap forwardPRALM = new PerReadAlleleLikelihoodMap();
+                final PerReadAlleleLikelihoodMap reversePRALM = new PerReadAlleleLikelihoodMap();
+                splitPRALMintoForwardAndReverseReads(tumorPRALM, forwardPRALM, reversePRALM);
+
+                MuTect2.logReadInfo(DEBUG_READ_NAME, tumorPRALM.getLikelihoodReadMap().keySet(), "Present in tumor PRALM after PRALM is split");
+                MuTect2.logReadInfo(DEBUG_READ_NAME, forwardPRALM.getLikelihoodReadMap().keySet(), "Present in forward PRALM after PRALM is split");
+                MuTect2.logReadInfo(DEBUG_READ_NAME, reversePRALM.getLikelihoodReadMap().keySet(), "Present in reverse PRALM after PRALM is split");
+
+                // TODO: build a new type for probability, likelihood, and log_likelihood. e.g. f_fwd :: probability[], tumorGLs_fwd :: likelihood[]
+                // TODO: don't want to call getHetGenotypeLogLikelihoods on more than one alternate alelle. May need to overload it to take a scalar f_fwd.
+                final PerAlleleCollection<Double> alleleFractionsForward = estimateAlleleFraction(mergedVC, forwardPRALM, true);
+                final PerAlleleCollection<Double> tumorGenotypeLLForward = getHetGenotypeLogLikelihoods(mergedVC, forwardPRALM, originalNormalReadQualities, alleleFractionsForward);
+
+                final PerAlleleCollection<Double> alleleFractionsReverse = estimateAlleleFraction(mergedVC, reversePRALM, true);
+                final PerAlleleCollection<Double> tumorGenotypeLLReverse = getHetGenotypeLogLikelihoods(mergedVC, reversePRALM, originalNormalReadQualities, alleleFractionsReverse);
+
+                final double tumorLod_fwd = tumorGenotypeLLForward.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLForward.getRef();
+                final double tumorLod_rev = tumorGenotypeLLReverse.getAlt(alleleWithHighestTumorLOD) - tumorGenotypeLLReverse.getRef();
+
+                // Note that we use the observed combined (+ and -) allele fraction for power calculation in either direction
+                final double tumorSBpower_fwd = strandArtifactPowerCalculator.cachedPowerCalculation(forwardPRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
+                final double tumorSBpower_rev = strandArtifactPowerCalculator.cachedPowerCalculation(reversePRALM.getNumberOfStoredElements(), altAlleleFractions.getAlt(alleleWithHighestTumorLOD));
+
+
+                callVcb.attribute(GATKVCFConstants.TLOD_FWD_KEY, tumorLod_fwd);
+                callVcb.attribute(GATKVCFConstants.TLOD_REV_KEY, tumorLod_rev);
+                callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_FWD_KEY, tumorSBpower_fwd);
+                callVcb.attribute(GATKVCFConstants.TUMOR_SB_POWER_REV_KEY, tumorSBpower_rev);
+                // TODO: add vcf INFO fields. see callVcb.attribute(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, haplotypeCount);
+
+                if ((tumorSBpower_fwd > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_fwd < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD) ||
+                        (tumorSBpower_rev > MTAC.STRAND_ARTIFACT_POWER_THRESHOLD && tumorLod_rev < MTAC.STRAND_ARTIFACT_LOD_THRESHOLD))
+                    callVcb.filter(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
+            }
+
+            // TODO: this probably belongs in M2::calculateFilters()
+            if (numPassingAlts > 1) {
+                callVcb.filter(GATKVCFConstants.TRIALLELIC_SITE_FILTER_NAME);
+            }
+
+            // build genotypes TODO: this part needs review and refactor
+            final List<Allele> tumorAlleles = Arrays.asList(mergedVC.getReference(), alleleWithHighestTumorLOD);
+            final Genotype tumorGenotype = new GenotypeBuilder(tumorSampleName, tumorAlleles)
+                    .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, altAlleleFractions.getAlt(alleleWithHighestTumorLOD))
+                    .make(); // TODO: add ADs?
+            final List<Genotype> genotypes = new ArrayList<>();
+            genotypes.add(tumorGenotype);
+
+            // We assume that the genotype in the normal is 0/0
+            // TODO: is normal always homozygous reference?
+            final List<Allele> homRefAllelesforNormalGenotype = Collections.nCopies(2, mergedVC.getReference());
+
+            // if we are calling with a normal, build the genotype for the sample to appear in vcf
+            if (hasNormal) {
+                final PerAlleleCollection<Integer> normalCounts = getRefAltCount(mergedVC, normalPRALM, false);
+                final int normalRefAlleleDepth = normalCounts.getRef();
+                final int normalAltAlleleDepth = normalCounts.getAlt(alleleWithHighestTumorLOD);
+                final int[] normalAlleleDepths = { normalRefAlleleDepth, normalAltAlleleDepth };
+                final double normalAlleleFraction = (double) normalAltAlleleDepth / ( normalRefAlleleDepth + normalAltAlleleDepth);
+
+                final Genotype normalGenotype = new GenotypeBuilder(matchedNormalSampleName, homRefAllelesforNormalGenotype)
+                        .AD(normalAlleleDepths)
+                        .attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, normalAlleleFraction)
+                        .make();
+                genotypes.add(normalGenotype);
+            }
+
+            //only use alleles found in the tumor (
+            final VariantContext call = new VariantContextBuilder(callVcb).alleles(tumorAlleles).genotypes(genotypes).make();
+
+            // how should we be making use of _perSampleFilteredReadList_?
+            readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
+                    genomeLocParser, false, alleleMapper, readAlleleLikelihoods, call);
+
+            final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC.getChr(), mergedVC.getStart(), mergedVC.getEnd()), refLoc, ref);
+            VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker, readAlleleLikelihoods, call, false);
+
+            if( call.getAlleles().size() != mergedVC.getAlleles().size() )
+                annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
+
+            // maintain the set of all called haplotypes
+            call.getAlleles().stream().map(alleleMapper::get).filter(Objects::nonNull).forEach(calledHaplotypes::addAll);
+            returnCalls.add( annotatedCall );
        }

        // TODO: understand effect of enabling this for somatic calling...
-        final List<VariantContext> phasedCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
-         return new CalledHaplotypes(phasedCalls, calledHaplotypes);
-        //return new CalledHaplotypes(returnCalls, calledHaplotypes);
+        final List<VariantContext> outputCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
+        return new CalledHaplotypes(outputCalls, calledHaplotypes);
    }

-    private void verifySamplePresence(String sampleName, List<String> samples) {
+    private void verifySamplePresence(final String sampleName, final List<String> samples) {
        if (!samples.contains(sampleName)) {
            throw new IllegalArgumentException("Unable to find sample name "+sampleName+"in sample list of " + StringUtil.join(",", samples));
        }
@ -459,7 +385,7 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
     * @param alleleFractions                       allele fraction(s) for alternate allele(s)
     *
     * @return                                      genotype likelihoods for homRef and het for each alternate allele
-    */
+     */
    private PerAlleleCollection<Double> getHetGenotypeLogLikelihoods(final VariantContext mergedVC,
                                                                     final PerReadAlleleLikelihoodMap tumorPRALM,
                                                                     final Map<String, Integer> originalNormalMQs,
@ -470,13 +396,11 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        }

        final PerAlleleCollection<MutableDouble> genotypeLogLikelihoods = PerAlleleCollection.createPerRefAndAltAlleleCollection();
-        for (final Allele allele : mergedVC.getAlleles()){
-            genotypeLogLikelihoods.set(allele, new MutableDouble(0.0));
-        }
+        mergedVC.getAlleles().forEach(a -> genotypeLogLikelihoods.set(a, new MutableDouble(0)));

        final Allele refAllele = mergedVC.getReference();
        for(Map.Entry<GATKSAMRecord,Map<Allele, Double>> readAlleleLikelihoodMap : tumorPRALM.getLikelihoodReadMap().entrySet()) {
-            Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
+            final Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
            if (originalNormalMQs.get(readAlleleLikelihoodMap.getKey().getReadName()) == 0) {
                continue;
            }
@ -484,9 +408,9 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
            final double readRefLogLikelihood = alleleLikelihoodMap.get(refAllele);
            genotypeLogLikelihoods.getRef().add(readRefLogLikelihood);

-            for (Allele altAllele : alleleFractions.getAltAlleles()) {
-                double readAltLogLikelihood = alleleLikelihoodMap.get(altAllele);
-                double adjustedReadAltLL = Math.log10(
+            for (final Allele altAllele : alleleFractions.getAltAlleles()) {
+                final double readAltLogLikelihood = alleleLikelihoodMap.get(altAllele);
+                final double adjustedReadAltLL = Math.log10(
                        Math.pow(10, readRefLogLikelihood) * (1 - alleleFractions.getAlt(altAllele)) +
                                Math.pow(10, readAltLogLikelihood) * alleleFractions.getAlt(altAllele)
                );
@ -515,9 +439,9 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        final PerAlleleCollection<Integer> alleleCounts = getRefAltCount(vc, pralm, oneStrandOnly);
        final PerAlleleCollection<Double> alleleFractions = PerAlleleCollection.createPerAltAlleleCollection();

-        int refCount = alleleCounts.getRef();
+        final int refCount = alleleCounts.getRef();
        for ( final Allele altAllele : vc.getAlternateAlleles() ) {
-            int altCount = alleleCounts.getAlt(altAllele);
+            final int altCount = alleleCounts.getAlt(altAllele);
            double alleleFraction = (double) altCount / (refCount + altCount);
            // weird case, but I've seen it happen in one strand cases
            if (refCount == 0 && altCount == refCount ) {
@ -552,16 +476,12 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {


        final PerAlleleCollection<MutableInt> alleleCounts = PerAlleleCollection.createPerRefAndAltAlleleCollection();
-
-        // initialize the allele counts to 0
-        for (final Allele allele : vcAlleles) {
-            alleleCounts.set(allele, new MutableInt(0));
-        }
+        vcAlleles.stream().forEach(a -> alleleCounts.set(a, new MutableInt(0)));

        for (final Map.Entry<GATKSAMRecord, Map<Allele, Double>> readAlleleLikelihoodMap : pralm.getLikelihoodReadMap().entrySet()) {
            final GATKSAMRecord read = readAlleleLikelihoodMap.getKey();
            final Map<Allele, Double> alleleLikelihoodMap = readAlleleLikelihoodMap.getValue();
-            MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(alleleLikelihoodMap, vcAlleles);
+            final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(alleleLikelihoodMap, vcAlleles);

            if (read.getMappingQuality() > 0 && mostLikelyAllele.isInformative()) {
                alleleCounts.get(mostLikelyAllele.getMostLikelyAllele()).increment();
@ -581,18 +501,16 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
        }
    }

-    private void filterPRALMForOverlappingReads(PerReadAlleleLikelihoodMap pralm, Allele ref, int location, boolean retainMismatches) {
-
-        Map<GATKSAMRecord, Map<Allele, Double>> m = pralm.getLikelihoodReadMap();
-
+    private void filterPRALMForOverlappingReads(final PerReadAlleleLikelihoodMap pralm, final Allele ref, final int location, final boolean retainMismatches) {
+        final Map<GATKSAMRecord, Map<Allele, Double>> m = pralm.getLikelihoodReadMap();

        // iterate through the reads, if the name has been seen before we have overlapping (potentially) fragments, so handle them
-        Map<String, GATKSAMRecord> nameToRead = new HashMap<>();
-        Set<GATKSAMRecord> readsToKeep = new HashSet<>();
+        final Map<String, GATKSAMRecord> nameToRead = new HashMap<>();
+        final Set<GATKSAMRecord> readsToKeep = new HashSet<>();

-        for(GATKSAMRecord rec : m.keySet()) {
+        for(final GATKSAMRecord rec : m.keySet()) {
            // if we haven't seen it... just record the name and add it to the list of reads to keep
-            GATKSAMRecord existing = nameToRead.get(rec.getReadName());
+            final GATKSAMRecord existing = nameToRead.get(rec.getReadName());
            if (existing == null) {
                nameToRead.put(rec.getReadName(), rec);
                readsToKeep.add(rec);
@ -604,11 +522,11 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
                // TODO: CHECK IF THE READS BOTH OVERLAP THE POSITION!!!!
                if ( ReadUtils.isInsideRead(existing, location) && ReadUtils.isInsideRead(rec, location) ) {

-                    MostLikelyAllele existingMLA = pralm.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(existing));
-                    Allele existingAllele = existingMLA.getMostLikelyAllele();
+                    final MostLikelyAllele existingMLA = PerReadAlleleLikelihoodMap.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(existing));
+                    final Allele existingAllele = existingMLA.getMostLikelyAllele();

-                    MostLikelyAllele recMLA = pralm.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(rec));
-                    Allele recAllele = recMLA.getMostLikelyAllele();
+                    final MostLikelyAllele recMLA = PerReadAlleleLikelihoodMap.getMostLikelyAllele(pralm.getLikelihoodReadMap().get(rec));
+                    final Allele recAllele = recMLA.getMostLikelyAllele();

                    // if the reads disagree at this position...
                    if (!existingAllele.equals(recAllele)) {
@ -654,7 +572,7 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
    }

    private void splitPRALMintoForwardAndReverseReads(final PerReadAlleleLikelihoodMap originalPRALM, final PerReadAlleleLikelihoodMap forwardPRALM, final PerReadAlleleLikelihoodMap reversePRALM) {
-        Map<GATKSAMRecord, Map<Allele, Double>> origReadAlleleLikelihoodMap = originalPRALM.getLikelihoodReadMap();
+        final Map<GATKSAMRecord, Map<Allele, Double>> origReadAlleleLikelihoodMap = originalPRALM.getLikelihoodReadMap();
        for (final GATKSAMRecord read : origReadAlleleLikelihoodMap.keySet()) {
            if (read.isStrandless())
                continue;
@ -669,17 +587,4 @@ public class SomaticGenotypingEngine extends HaplotypeCallerGenotypingEngine {
            }
        }
    }
-
-
-    // Move to utility class so we can use one shared with HaplotypeCallerGenotypingEngine
-    private VariantContext addNonRefSymbolicAllele(final VariantContext mergedVC) {
-        final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
-        final List<Allele> originalList = mergedVC.getAlleles();
-        final List<Allele> alleleList = new ArrayList<>(originalList.size() + 1);
-        alleleList.addAll(mergedVC.getAlleles());
-        alleleList.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
-        vcb.alleles(alleleList);
-        return vcb.make();
-    }
-
 }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/TumorPowerCalculator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/TumorPowerCalculator.java
@ -55,6 +55,7 @@ import org.apache.commons.math.MathException;
 import org.apache.commons.math.distribution.BinomialDistribution;
 import org.apache.commons.math.distribution.BinomialDistributionImpl;
 import org.apache.commons.math3.util.Pair;
+import org.broadinstitute.gatk.utils.exceptions.GATKException;

 import java.util.Arrays;
 import java.util.HashMap;
@ -70,7 +71,6 @@ public class TumorPowerCalculator {
    private final double tumorLODThreshold;
    private final double contamination;
    private final boolean enableSmoothing;
-    public static int numCacheHits = 0;

    private final HashMap<PowerCacheKey, Double> cache = new HashMap<PowerCacheKey, Double>();

@ -134,16 +134,18 @@ public class TumorPowerCalculator {
     * @throws MathException
     *
     */
-    public double cachedPowerCalculation(final int numReads, final double alleleFraction) throws MathException {
+    public double cachedPowerCalculation(final int numReads, final double alleleFraction) {
        PowerCacheKey key = new PowerCacheKey(numReads, alleleFraction);
        // we first look up if power for given number of read and allele fraction has already been computed and stored in the cache.
        // if not we compute it and store it in teh cache.
        Double power = cache.get(key);
        if (power == null) {
-            power = calculatePower(numReads, alleleFraction);
+            try {
+                power = calculatePower(numReads, alleleFraction);
+            } catch (final Exception ex) {
+                throw new GATKException("Power calculation failed", ex);
+            }
            cache.put(key, power);
-        } else {
-            numCacheHits++;
        }
        return power;
    }