New implementation of the GGA mode in the HaplotypeCaller

-- We now inject the given alleles into the reference haplotype and add them to the graph. -- Those paths are read off of the graph and then evaluated with the appropriate marginalization for GGA mode. -- This unifies how Smith-Waterman is performed between discovery and GGA modes. -- Misc minor cleanup in several places.
2013-05-22 10:35:19 -04:00 · 2013-05-22 10:35:19 -04:00 · b5b9d745a7
parent 50b4c130ca
commit b5b9d745a7
16 changed files with 135 additions and 269 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java
@ -77,6 +77,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
    private final static int NUM_PATHS_PER_GRAPH = 25;
    private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
    private static final int GRAPH_KMER_STEP = 6;
+    private static final int GGA_MODE_ARTIFICIAL_COUNTS = 1000;

    private final int minKmer;
    private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms;
@ -92,8 +93,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
    }

    @Override
-    protected List<SeqGraph> assemble(final List<GATKSAMRecord> reads, final Haplotype refHaplotype) {
-        final List<SeqGraph> graphs = new LinkedList<SeqGraph>();
+    protected List<SeqGraph> assemble(final List<GATKSAMRecord> reads, final Haplotype refHaplotype, final List<Haplotype> activeAlleleHaplotypes ) {
+        final List<SeqGraph> graphs = new LinkedList<>();

        final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1;
        if( maxKmer < minKmer) {
@ -106,7 +107,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
                continue;

            if ( debug ) logger.info("Creating de Bruijn graph for " + kmer + " kmer using " + reads.size() + " reads");
-            DeBruijnGraph graph = createGraphFromSequences( reads, kmer, refHaplotype);
+            DeBruijnGraph graph = createGraphFromSequences(reads, kmer, refHaplotype, activeAlleleHaplotypes);
            if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object
                // do a series of steps to clean up the raw assembly graph to make it analysis-ready
                if ( debugGraphTransformations ) graph.printGraph(new File("unpruned.dot"), pruneFactor);
@ -133,7 +134,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
    }

    @Requires({"reads != null", "kmerLength > 0", "refHaplotype != null"})
-    protected DeBruijnGraph createGraphFromSequences( final List<GATKSAMRecord> reads, final int kmerLength, final Haplotype refHaplotype ) {
+    protected DeBruijnGraph createGraphFromSequences( final List<GATKSAMRecord> reads, final int kmerLength, final Haplotype refHaplotype, final List<Haplotype> activeAlleleHaplotypes ) {
        final DeBruijnGraph graph = new DeBruijnGraph(kmerLength);
        final DeBruijnGraphBuilder builder = new DeBruijnGraphBuilder(graph);

@ -142,8 +143,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
            // something went wrong, so abort right now with a null graph
            return null;

-        // now go through the graph already seeded with the reference sequence and add the read kmers to it
-        if ( ! addReadKmersToGraph(builder, reads) )
+        // now go through the graph already seeded with the reference sequence and add the read kmers to it as well as the artificial GGA haplotypes
+        if ( ! addReadKmersToGraph(builder, reads, activeAlleleHaplotypes) )
            // some problem was detected adding the reads to the graph, return null to indicate we failed
            return null;

@ -156,11 +157,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
     *
     * @param builder a debruijn graph builder to add the read kmers to
     * @param reads a non-null list of reads whose kmers we want to add to the graph
+     * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode
     * @return true if we successfully added the read kmers to the graph without corrupting it in some way
     */
-    protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List<GATKSAMRecord> reads) {
+    protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List<GATKSAMRecord> reads, final List<Haplotype> activeAlleleHaplotypes) {
        final int kmerLength = builder.getKmerSize();

+        // First pull kmers out of the artificial GGA haplotypes and throw them on the graph
+        for( final Haplotype haplotype : activeAlleleHaplotypes ) {
+            final int end = haplotype.length() - kmerLength;
+            for( int start = 0; start < end; start++ ) {
+                builder.addKmerPairFromSeqToGraph( haplotype.getBases(), start, GGA_MODE_ARTIFICIAL_COUNTS );
+            }
+        }
+
        // Next pull kmers out of every read and throw them on the graph
        for( final GATKSAMRecord read : reads ) {
            final byte[] sequence = read.getReadBases();
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -71,7 +71,7 @@ public class GenotypingEngine {

    private final boolean DEBUG;
    private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS;
-    private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+    private final static List<Allele> noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied
    private final VariantAnnotatorEngine annotationEngine;
    private final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;

@ -162,8 +162,8 @@ public class GenotypingEngine {
        final TreeSet<Integer> startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype);

        // Walk along each position in the key set and create each event to be outputted
-        final Set<Haplotype> calledHaplotypes = new HashSet<Haplotype>();
-        final List<VariantContext> returnCalls = new ArrayList<VariantContext>();
+        final Set<Haplotype> calledHaplotypes = new HashSet<>();
+        final List<VariantContext> returnCalls = new ArrayList<>();
        for( final int loc : startPosKeySet ) {
            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
                final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
@ -183,7 +183,7 @@ public class GenotypingEngine {
                if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
                    throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles.");
                }
-                final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<VariantContext, Allele>();
+                final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
                mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
                for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
                    mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
@ -244,7 +244,7 @@ public class GenotypingEngine {

        if ( in_GGA_mode ) startPosKeySet.clear();

-        cleanUpSymbolicUnassembledEvents( haplotypes );
+        //cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently
        if ( !in_GGA_mode ) {
            // run the event merger if we're not in GGA mode
            final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc);
@ -267,7 +267,7 @@ public class GenotypingEngine {
     * @return the list of the sources of vcs in the same order
     */
    private List<String> makePriorityList(final List<VariantContext> vcs) {
-        final List<String> priorityList = new LinkedList<String>();
+        final List<String> priorityList = new LinkedList<>();
        for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
        return priorityList;
    }
@ -276,7 +276,7 @@ public class GenotypingEngine {
                                                      final int loc,
                                                      final List<VariantContext> activeAllelesToGenotype) {
        // the overlapping events to merge into a common reference view
-        final List<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
+        final List<VariantContext> eventsAtThisLoc = new ArrayList<>();

        if( activeAllelesToGenotype.isEmpty() ) {
            for( final Haplotype h : haplotypes ) {
@ -292,7 +292,7 @@ public class GenotypingEngine {
                if( compVC.getStart() == loc ) {
                    int alleleCount = 0;
                    for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                        List<Allele> alleleSet = new ArrayList<Allele>(2);
+                        List<Allele> alleleSet = new ArrayList<>(2);
                        alleleSet.add(compVC.getReference());
                        alleleSet.add(compAltAllele);
                        final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount;
@ -348,7 +348,7 @@ public class GenotypingEngine {
                                                                                         final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
                                                                                         final VariantContext call ) {

-        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
+        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<>();
        final GenomeLoc callLoc = parser.createGenomeLoc(call);
        for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
            final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
@ -384,7 +384,7 @@ public class GenotypingEngine {
    // TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
    @Requires("haplotypes != null")
    protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
-        final List<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
+        final List<Haplotype> haplotypesToRemove = new ArrayList<>();
        for( final Haplotype h : haplotypes ) {
            for( final VariantContext vc : h.getEventMap().getVariantContexts() ) {
                if( vc.isSymbolic() ) {
@ -407,7 +407,7 @@ public class GenotypingEngine {
                                                                                              final Map<Allele, List<Haplotype>> alleleMapper,
                                                                                              final double downsamplingFraction ) {

-        final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
+        final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<>();
        for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
            final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
            for( final Map.Entry<Allele, List<Haplotype>> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele
@ -430,7 +430,7 @@ public class GenotypingEngine {
    }

    protected static Map<Allele, List<Haplotype>> createAlleleMapper( final Map<VariantContext, Allele> mergeMap, final Map<Event, List<Haplotype>> eventMap ) {
-        final Map<Allele, List<Haplotype>> alleleMapper = new LinkedHashMap<Allele, List<Haplotype>>();
+        final Map<Allele, List<Haplotype>> alleleMapper = new LinkedHashMap<>();
        for( final Map.Entry<VariantContext, Allele> entry : mergeMap.entrySet() ) {
            alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey())));
        }
@ -441,100 +441,33 @@ public class GenotypingEngine {
    @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
    protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {

-        final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
-        VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis
-        eventMapper.put(new Event(null), new ArrayList<Haplotype>());
+        final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1);
+        final Event refEvent = new Event(null);
+        eventMapper.put(refEvent, new ArrayList<Haplotype>());
        for( final VariantContext vc : eventsAtThisLoc ) {
            eventMapper.put(new Event(vc), new ArrayList<Haplotype>());
        }

-        final List<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
        for( final Haplotype h : haplotypes ) {
-            if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) {
-                final List<Allele> alleles = new ArrayList<Allele>(2);
-                alleles.add(h.getArtificialRefAllele());
-                alleles.add(h.getArtificialAltAllele());
-                final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype")
-                        .alleles(alleles)
-                        .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + h.getArtificialRefAllele().length() - 1).make() );
-                if( eventMapper.containsKey(artificialVC) ) {
-                    eventMapper.get(artificialVC).add(h);
-                }
-            } else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later
-                undeterminedHaplotypes.add(h);
+            if( h.getEventMap().get(loc) == null ) {
+                eventMapper.get(refEvent).add(h);
            } else {
-                boolean haplotypeIsDetermined = false;
                for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
                    if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
                        eventMapper.get(new Event(vcAtThisLoc)).add(h);
-                        haplotypeIsDetermined = true;
                        break;
                    }
                }
-
-                if( !haplotypeIsDetermined )
-                    undeterminedHaplotypes.add(h);
            }
        }

-        for( final Haplotype h : undeterminedHaplotypes ) {
-            Event matchingEvent = new Event(null);
-            for( final Map.Entry<Event, List<Haplotype>> eventToTest : eventMapper.entrySet() ) {
-                // don't test against the reference allele
-                if( eventToTest.getKey().equals(new Event(null)) )
-                    continue;
-
-                // only try to disambiguate for alleles that have had haplotypes previously assigned above
-                if( eventToTest.getValue().isEmpty() )
-                    continue;
-
-                final Haplotype artificialHaplotype = eventToTest.getValue().get(0);
-                if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) {
-                    matchingEvent = eventToTest.getKey();
-                    break;
-                }
-            }
-
-            eventMapper.get(matchingEvent).add(h);
-        }
-
        return eventMapper;
    }

-    protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset, final boolean resolveSupersetToSubset) {
-
-        for ( final Map.Entry<Integer, VariantContext> fromSubset : subset.entrySet() ) {
-            final VariantContext fromSuperset = superset.get(fromSubset.getKey());
-            if ( fromSuperset == null )
-                return false;
-
-            List<Allele> supersetAlleles = fromSuperset.getAlternateAlleles();
-            if ( resolveSupersetToSubset )
-                supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles);
-
-            if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) )
-                return false;
-        }
-
-        return true;
-    }
-
-    private static List<Allele> resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List<Allele> currentAlleles) {
-        if ( targetReference.length() <= actualReference.length() )
-            return currentAlleles;
-
-        final List<Allele> newAlleles = new ArrayList<Allele>(currentAlleles.size());
-        final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length());
-        for ( final Allele a : currentAlleles ) {
-            newAlleles.add(Allele.extend(a, extraBases));
-        }
-        return newAlleles;
-    }
-
    @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
    protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
        if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
-        final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        final List<Allele> eventAllelesForSample = new ArrayList<>();
        for( final Allele a : haplotypeAllelesForSample ) {
            final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
            for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -47,6 +47,9 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import com.google.java.contract.Ensures;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
@ -433,8 +436,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
    private final static int MIN_READ_LENGTH = 10;

    private List<String> samplesList = new ArrayList<String>();
-    private final static double LOG_ONE_HALF = -Math.log10(2.0);
-    private final static double LOG_ONE_THIRD = -Math.log10(3.0);
    private final List<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();

    private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@ -603,7 +604,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
            // if we don't have any data, just abort early
            return new ActivityProfileState(ref.getLocus(), 0.0);

-        final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+        final List<Allele> noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied
        noCall.add(Allele.NO_CALL);

        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
@ -625,14 +626,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
                        }
                    }
                    genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
-                    genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
-                    genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
+                    genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF );
+                    genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD;
                }
            }
            genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
        }

-        final List<Allele> alleles = new ArrayList<Allele>();
+        final List<Allele> alleles = new ArrayList<>();
        alleles.add( FAKE_REF_ALLELE );
        alleles.add( FAKE_ALT_ALLELE );
        final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
@ -746,9 +747,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        // Create the reference haplotype which is the bases from the reference that make up the active region
        finalizeActiveRegion(activeRegion); // merge overlapping fragments, clip adapter and low qual tails

-        final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true);
        final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING);
        final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion);
+        final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc);

        final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype );

@ -760,6 +761,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        }
    }

+    /**
+     * Helper function to create the reference haplotype out of the active region and a padded loc
+     * @param activeRegion the active region from which to generate the reference haplotype
+     * @param paddedReferenceLoc the GenomeLoc which includes padding and shows how big the reference haplotype should be
+     * @return a non-null haplotype
+     */
+    private Haplotype createReferenceHaplotype(final ActiveRegion activeRegion, final GenomeLoc paddedReferenceLoc) {
+        final Haplotype refHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true);
+        refHaplotype.setAlignmentStartHapwrtRef(activeRegion.getExtendedLoc().getStart() - paddedReferenceLoc.getStart());
+        final Cigar c = new Cigar();
+        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
+        refHaplotype.setCigar(c);
+        return refHaplotype;
+    }
+
    /**
     * Trim down the active region to just enough to properly genotype the events among the haplotypes
     *
@ -791,7 +807,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        }

        // trim down the haplotypes
-        final Set<Haplotype> haplotypeSet = new HashSet<Haplotype>(haplotypes.size());
+        final Set<Haplotype> haplotypeSet = new HashSet<>(haplotypes.size());
        for ( final Haplotype h : haplotypes ) {
            final Haplotype trimmed = h.trim(trimmedActiveRegion.getExtendedLoc());
            if ( trimmed != null ) {
@ -802,7 +818,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        }

        // create the final list of trimmed haplotypes
-        final List<Haplotype> trimmedHaplotypes = new ArrayList<Haplotype>(haplotypeSet);
+        final List<Haplotype> trimmedHaplotypes = new ArrayList<>(haplotypeSet);

        // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM
        Collections.sort( trimmedHaplotypes, new HaplotypeBaseComparator() );
@ -816,7 +832,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In


        // trim down the reads and add them to the trimmed active region
-        final List<GATKSAMRecord> trimmedReads = new ArrayList<GATKSAMRecord>(originalActiveRegion.getReads().size());
+        final List<GATKSAMRecord> trimmedReads = new ArrayList<>(originalActiveRegion.getReads().size());
        for( final GATKSAMRecord read : originalActiveRegion.getReads() ) {
            final GATKSAMRecord clippedRead = ReadClipper.hardClipToRegion( read, trimmedActiveRegion.getExtendedLoc().getStart(), trimmedActiveRegion.getExtendedLoc().getStop() );
            if( trimmedActiveRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
@ -937,7 +953,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
    }

    private Map<String, List<GATKSAMRecord>> splitReadsBySample( final Collection<GATKSAMRecord> reads ) {
-        final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<String, List<GATKSAMRecord>>();
+        final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
        for( final String sample : samplesList) {
            List<GATKSAMRecord> readList = returnMap.get( sample );
            if( readList == null ) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -74,7 +74,6 @@ import java.util.*;
 public class LikelihoodCalculationEngine {
    private final static Logger logger = Logger.getLogger(LikelihoodCalculationEngine.class);

-    private static final double LOG_ONE_HALF = -Math.log10(2.0);
    private final byte constantGCP;
    private final double log10globalReadMismappingRate;
    private final boolean DEBUG;
@ -299,7 +298,7 @@ public class LikelihoodCalculationEngine {
                        // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
                        // First term is approximated by Jacobian log with table lookup.
                        haplotypeLikelihood += ReadUtils.getMeanRepresentativeReadCount( entry.getKey() ) *
-                                ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + LOG_ONE_HALF );
+                                ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + MathUtils.LOG_ONE_HALF );
                    }
                }
                haplotypeLikelihoodMatrix[iii][jjj] = haplotypeLikelihood;
@ -397,11 +396,11 @@ public class LikelihoodCalculationEngine {
        if ( haplotypes.size() == 2 ) return haplotypes; // fast path -- we'll always want to use 2 haplotypes

        // all of the haplotypes that at least one sample called as one of the most likely
-        final Set<Haplotype> selectedHaplotypes = new HashSet<Haplotype>();
+        final Set<Haplotype> selectedHaplotypes = new HashSet<>();
        selectedHaplotypes.add(findReferenceHaplotype(haplotypes)); // ref is always one of the selected

        // our annoying map from allele -> haplotype
-        final Map<Allele, Haplotype> allele2Haplotype = new HashMap<Allele, Haplotype>();
+        final Map<Allele, Haplotype> allele2Haplotype = new HashMap<>();
        for ( final Haplotype h : haplotypes ) {
            h.setScore(h.isReference() ? Double.MAX_VALUE : 0.0); // set all of the scores to 0 (lowest value) for all non-ref haplotypes
            allele2Haplotype.put(Allele.create(h, h.isReference()), h);
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@ -111,7 +111,11 @@ public abstract class LocalAssemblyEngine {
     * @param refHaplotype the reference haplotype
     * @return a non-null list of reads
     */
-    protected abstract List<SeqGraph> assemble(List<GATKSAMRecord> reads, Haplotype refHaplotype);
+    protected abstract List<SeqGraph> assemble(List<GATKSAMRecord> reads, Haplotype refHaplotype, List<Haplotype> activeAlleleHaplotypes);
+
+    protected List<SeqGraph> assemble(List<GATKSAMRecord> reads, Haplotype refHaplotype) {
+        return assemble(reads, refHaplotype, Collections.<Haplotype>emptyList());
+    }

    /**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
@ -128,8 +132,11 @@ public abstract class LocalAssemblyEngine {
        if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); }
        if( pruneFactor < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); }

+        // create the list of artificial haplotypes that should be added to the graph for GGA mode
+        final List<Haplotype> activeAlleleHaplotypes = createActiveAlleleHaplotypes(refHaplotype, activeAllelesToGenotype, activeRegion.getExtendedLoc());
+
        // create the graphs by calling our subclass assemble method
-        final List<SeqGraph> graphs = assemble(activeRegion.getReads(), refHaplotype);
+        final List<SeqGraph> graphs = assemble(activeRegion.getReads(), refHaplotype, activeAlleleHaplotypes);

        // do some QC on the graphs
        for ( final SeqGraph graph : graphs ) { sanityCheckGraph(graph, refHaplotype); }
@ -138,45 +145,53 @@ public abstract class LocalAssemblyEngine {
        if ( graphWriter != null ) { printGraphs(graphs); }

        // find the best paths in the graphs and return them as haplotypes
-        return findBestPaths( graphs, refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
+        return findBestPaths( graphs, refHaplotype, refLoc, activeRegion.getExtendedLoc() );
    }

-    @Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"})
-    @Ensures({"result.contains(refHaplotype)"})
-    protected List<Haplotype> findBestPaths(final List<SeqGraph> graphs, final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow) {
-        // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
-        final Set<Haplotype> returnHaplotypes = new LinkedHashSet<Haplotype>();
-        refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart());
-        final Cigar c = new Cigar();
-        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
-        refHaplotype.setCigar(c);
-        returnHaplotypes.add( refHaplotype );
-
+    /**
+     * Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
+     * @param refHaplotype the reference haplotype
+     * @param activeAllelesToGenotype the list of alternate alleles in VariantContexts
+     * @param activeRegionWindow the window containing the reference haplotype
+     * @return a non-null list of haplotypes
+     */
+    private List<Haplotype> createActiveAlleleHaplotypes(final Haplotype refHaplotype, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow) {
+        final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
-        final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();

-        // for GGA mode, add the desired allele into the haplotype
        for( final VariantContext compVC : activeAllelesToGenotype ) {
            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
-                addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true );
+                if( insertedRefHaplotype != null ) { // can be null if the requested allele can't be inserted into the haplotype
+                    returnHaplotypes.add(insertedRefHaplotype);
+                }
            }
        }

+        return new ArrayList<>(returnHaplotypes);
+    }
+
+    @Ensures({"result.contains(refHaplotype)"})
+    protected List<Haplotype> findBestPaths(final List<SeqGraph> graphs, final Haplotype refHaplotype, final GenomeLoc refLoc, final GenomeLoc activeRegionWindow) {
+        // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
+        final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
+        returnHaplotypes.add( refHaplotype );
+
+        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
+
        for( final SeqGraph graph : graphs ) {
            final SeqVertex source = graph.getReferenceSourceVertex();
            final SeqVertex sink = graph.getReferenceSinkVertex();
            if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);

-            final KBestPaths<SeqVertex,BaseEdge> pathFinder = new KBestPaths<SeqVertex,BaseEdge>(allowCyclesInKmerGraphToGeneratePaths);
+            final KBestPaths<SeqVertex,BaseEdge> pathFinder = new KBestPaths<>(allowCyclesInKmerGraphToGeneratePaths);
            for ( final Path<SeqVertex,BaseEdge> path : pathFinder.getKBestPaths(graph, numBestHaplotypesPerGraph, source, sink) ) {
-//                logger.info("Found path " + path);
                Haplotype h = new Haplotype( path.getBases() );
                if( !returnHaplotypes.contains(h) ) {
                    final Cigar cigar = path.calculateCigar(refHaplotype.getBases());

                    if ( cigar == null ) {
-                        // couldn't produce a meaningful alignment of haplotype to reference, fail quitely
+                        // couldn't produce a meaningful alignment of haplotype to reference, fail quietly
                        continue;
                    } else if( cigar.isEmpty() ) {
                        throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() +
@ -197,25 +212,6 @@ public abstract class LocalAssemblyEngine {

                    if ( debug )
                        logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize());
-
-                    // for GGA mode, add the desired allele into the haplotype if it isn't already present
-                    if( !activeAllelesToGenotype.isEmpty() ) {
-                        final Map<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, refWithPadding, refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
-                        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
-                            final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
-
-                            // This if statement used to additionally have:
-                            //      "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
-                            //  but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
-                            //  a haplotype that already contains a 1bp insertion (so practically it is reference but
-                            //  falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
-                            if( vcOnHaplotype == null ) {
-                                for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                                    addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false );
-                                }
-                            }
-                        }
-                    }
                }
            }
        }
@ -238,7 +234,7 @@ public abstract class LocalAssemblyEngine {
            }
        }

-        return new ArrayList<Haplotype>(returnHaplotypes);
+        return new ArrayList<>(returnHaplotypes);
    }

    /**
@ -256,71 +252,6 @@ public abstract class LocalAssemblyEngine {
        return false;
    }

-    /**
-     * Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype.
-     * Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information.
-     * This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based.
-     * @param haplotype                     the candidate haplotype
-     * @param ref                           the reference bases to align against
-     * @param haplotypeList                 the current list of haplotypes
-     * @param activeRegionStart             the start of the active region in the reference byte array
-     * @param activeRegionStop              the stop of the active region in the reference byte array
-     * @param FORCE_INCLUSION_FOR_GGA_MODE  if true will include in the list even if it already exists
-     * @return                              true if the candidate haplotype was successfully incorporated into the haplotype list
-     */
-    @Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"})
-    private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final Set<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
-        if( haplotype == null ) { return false; }
-
-        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SWParameterSet.STANDARD_NGS );
-        haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
-
-        if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments
-            return false;
-        }
-
-        haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) );
-
-        final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true);
-        int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true );
-        if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) {
-            hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal
-        }
-        byte[] newHaplotypeBases;
-        // extend partial haplotypes to contain the full active region sequence
-        if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
-            newHaplotypeBases = ArrayUtils.addAll(ArrayUtils.addAll(ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()),
-                    haplotype.getBases()),
-                    ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop));
-        } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
-            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) );
-        } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
-            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
-        } else {
-            newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop);
-        }
-
-        final Haplotype h = new Haplotype( newHaplotypeBases );
-        final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SWParameterSet.STANDARD_NGS );
-
-        h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
-        if ( haplotype.isArtificialHaplotype() ) {
-            h.setArtificialEvent(haplotype.getArtificialEvent());
-        }
-        if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments
-            return false;
-        }
-
-        h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) );
-
-        if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) {
-            haplotypeList.add(h);
-            return true;
-        } else {
-            return false;
-        }
-    }
-
    protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) {
        if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor);

@ -372,7 +303,6 @@ public abstract class LocalAssemblyEngine {
     * Perform general QC on the graph to make sure something hasn't gone wrong during assembly
     * @param graph the graph to check
     * @param refHaplotype the reference haplotype
-     * @param <T>
     */
    private <T extends BaseVertex, E extends BaseEdge> void sanityCheckGraph(final BaseGraph<T,E> graph, final Haplotype refHaplotype) {
        sanityCheckReferenceGraph(graph, refHaplotype);
@ -383,7 +313,6 @@ public abstract class LocalAssemblyEngine {
     *
     * @param graph the graph to check
     * @param refHaplotype the reference haplotype
-     * @param <T>
     */
    private <T extends BaseVertex, E extends BaseEdge> void sanityCheckReferenceGraph(final BaseGraph<T,E> graph, final Haplotype refHaplotype) {
        if( graph.getReferenceSourceVertex() == null ) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
@ -62,6 +62,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
    private final static Logger logger = Logger.getLogger(ReadThreadingAssembler.class);

    private final static int DEFAULT_NUM_PATHS_PER_GRAPH = 128;
+    private final static int GGA_MODE_ARTIFICIAL_COUNTS = 1000;

    /** The min and max kmer sizes to try when building the graph. */
    private final List<Integer> kmerSizes;
@ -88,7 +89,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
    }

    @Override
-    public List<SeqGraph> assemble( final List<GATKSAMRecord> reads, final Haplotype refHaplotype) {
+    public List<SeqGraph> assemble( final List<GATKSAMRecord> reads, final Haplotype refHaplotype, final List<Haplotype> activeAlleleHaplotypes ) {
        final List<SeqGraph> graphs = new LinkedList<>();

        for ( final int kmerSize : kmerSizes ) {
@ -96,6 +97,12 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {

            // add the reference sequence to the graph
            rtgraph.addSequence("ref", refHaplotype.getBases(), null, true);
+            int hapCount = 0;
+            for( final Haplotype h : activeAlleleHaplotypes ) {
+                final int[] counts = new int[h.length()];
+                Arrays.fill(counts, GGA_MODE_ARTIFICIAL_COUNTS);
+                rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), counts, false);
+            }

            // Next pull kmers out of every read and throw them on the graph
            for( final GATKSAMRecord read : reads ) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
@ -590,11 +590,9 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
    }

    /**
-     * Get the start and stop positions (exclusive) of the longest stretch of high quality bases
-     * in read
+     * Get the longest stretch of high quality bases in read and pass that sequence to the graph
     *
     * @param read a non-null read
-     * @return the start and stop for high quality bases in read, or null if none exist
     */
    protected void addRead(final GATKSAMRecord read) {
        final byte[] sequence = read.getReadBases();
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -78,8 +78,6 @@ public class PairHMMIndelErrorModel {
    private static final double baseMatchArray[];
    private static final double baseMismatchArray[];

-    private final static double LOG_ONE_HALF;
-
    private static final int START_HRUN_GAP_IDX = 4;
    private static final int MAX_HRUN_GAP_IDX = 20;

@ -97,8 +95,6 @@ public class PairHMMIndelErrorModel {
    /////////////////////////////

    static {
-        LOG_ONE_HALF= -Math.log10(2.0);
-
        baseMatchArray = new double[MAX_CACHED_QUAL+1];
        baseMismatchArray = new double[MAX_CACHED_QUAL+1];
        for (int k=1; k <= MAX_CACHED_QUAL; k++) {
@ -466,7 +462,7 @@ public class PairHMMIndelErrorModel {
                    final double li = readLikelihoods[readIdx][i];
                    final double lj = readLikelihoods[readIdx][j];
                    final int readCount = readCounts[readIdx];
-                    haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF);
+                    haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + MathUtils.LOG_ONE_HALF);
                }
            }
        }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java
@ -73,8 +73,8 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
    public void testReferenceCycleGraph() {
        String refCycle = "ATCGAGGAGAGCGCCCCGAGATATATATATATATATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATATATATATATGGGAGAGGGGATATATATATATCCCCCC";
        String noCycle = "ATCGAGGAGAGCGCCCCGAGATATTATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATGGGAGAGGGGATATATAATATCCCCCC";
-        final DeBruijnGraph g1 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(refCycle.getBytes(), true));
-        final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(noCycle.getBytes(), true));
+        final DeBruijnGraph g1 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(refCycle.getBytes(), true), Collections.<Haplotype>emptyList());
+        final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(noCycle.getBytes(), true), Collections.<Haplotype>emptyList());

        Assert.assertTrue(g1 == null, "Reference cycle graph should return null during creation.");
        Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation.");
@ -147,7 +147,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
            }
        }

-        assembler.addReadKmersToGraph(builder, Arrays.asList(read));
+        assembler.addReadKmersToGraph(builder, Arrays.asList(read), Collections.<Haplotype>emptyList());
        Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size());
        for ( final Kmer addedKmer : builder.addedPairs ) {
            Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases);
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleGGAComplex() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
-                "008029ee34e1becd8312e3c4d608033c");
+                "38b4596c3910fdde51ea59aa1a8f848f");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
-                "ae8d95ffe77515cc74a55c2afd142826");
+                "08147870d73d9749ced8cfc7cdd4714f");
    }
 }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    @Test
    public void testHaplotypeCallerMultiSampleGGA() {
        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
-                "bb30d0761dc9e2dfd57bfe07b72d06d8");
+                "ffd69c410dca0d2f9fe75f3cb5d08179");
    }

    @Test
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java
@ -47,6 +47,9 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import net.sf.picard.reference.IndexedFastaSequenceFile;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
@ -216,6 +219,10 @@ public class LocalAssemblyEngineUnitTest extends BaseTest {

    private List<Haplotype> assemble(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final List<GATKSAMRecord> reads) {
        final Haplotype refHaplotype = new Haplotype(refBases, true);
+        final Cigar c = new Cigar();
+        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
+        refHaplotype.setCigar(c);
+
        final ActiveRegion activeRegion = new ActiveRegion(loc, null, true, genomeLocParser, 0);
        activeRegion.addAll(reads);
        final LocalAssemblyEngine engine = createAssembler(assembler);
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java
@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
        public SeqGraph assemble() {
            assembler.removePathsNotConnectedToRef = false; // need to pass some of the tests
            assembler.setDebugGraphTransformations(true);
-            final SeqGraph graph = assembler.assemble(reads, refHaplotype).get(0);
+            final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.<Haplotype>emptyList()).get(0);
            if ( DEBUG ) graph.printGraph(new File("test.dot"), 0);
            return graph;
        }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -55,17 +55,19 @@ public class MathUtils {
    private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
    private static final double MAX_JACOBIAN_TOLERANCE = 8.0;
    private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
-    private static final int MAXN = 70000;
+    private static final int MAXN = 70_000;
    private static final int LOG10_CACHE_SIZE = 4 * MAXN;  // we need to be able to go up to 2*(2N) when calculating some of the coefficients

    /**
     * The smallest log10 value we'll emit from normalizeFromLog10 and other functions
     * where the real-space value is 0.0.
     */
-    public final static double LOG10_P_OF_ZERO = -1000000.0;
-    public final static double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5);
-    private final static double NATURAL_LOG_OF_TEN = Math.log(10.0);
-    private final static double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI);
+    public static final double LOG10_P_OF_ZERO = -1000000.0;
+    public static final double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5);
+    public static final double LOG_ONE_HALF = -Math.log10(2.0);
+    public static final double LOG_ONE_THIRD = -Math.log10(3.0);
+    private static final double NATURAL_LOG_OF_TEN = Math.log(10.0);
+    private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI);

    static {
        log10Cache = new double[LOG10_CACHE_SIZE];
--- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
@ -221,7 +221,7 @@ public class PerReadAlleleLikelihoodMap {
                    final int count = ReadUtils.getMeanRepresentativeReadCount(read);
                    final double likelihood_iii = entry.getValue().get(iii_allele);
                    final double likelihood_jjj = entry.getValue().get(jjj_allele);
-                    haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + LOG_ONE_HALF);
+                    haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF);

                    // fast exit.  If this diploid pair is already worse than the max, just stop and look at the next pair
                    if ( haplotypeLikelihood < maxElement ) break;
@ -241,7 +241,6 @@ public class PerReadAlleleLikelihoodMap {

        return new MostLikelyAllele(alleles.get(hap1), alleles.get(hap2), maxElement, maxElement);
    }
-    private static final double LOG_ONE_HALF = -Math.log10(2.0);

    /**
     * Given a map from alleles to likelihoods, find the allele with the largest likelihood.
--- a/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java
@ -46,7 +46,6 @@ public class Haplotype extends Allele {
    private EventMap eventMap = null;
    private Cigar cigar;
    private int alignmentStartHapwrtRef;
-    private Event artificialEvent = null;
    private double score = 0;

    /**
@ -93,11 +92,6 @@ public class Haplotype extends Allele {
        super(allele, true);
    }

-    protected Haplotype( final byte[] bases, final Event artificialEvent ) {
-        this(bases, false);
-        this.artificialEvent = artificialEvent;
-    }
-
    public Haplotype( final byte[] bases, final GenomeLoc loc ) {
        this(bases, false);
        this.genomeLocation = loc;
@ -189,7 +183,7 @@ public class Haplotype extends Allele {
    }

    /**
-     * Get the cigar for this haplotype.  Note that cigar is guarenteed to be consolidated
+     * Get the cigar for this haplotype.  Note that the cigar is guaranteed to be consolidated
     * in that multiple adjacent equal operates will have been merged
     * @return the cigar of this haplotype
     */
@ -223,30 +217,6 @@ public class Haplotype extends Allele {
            throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength());
    }

-    public boolean isArtificialHaplotype() {
-        return artificialEvent != null;
-    }
-
-    public Event getArtificialEvent() {
-        return artificialEvent;
-    }
-
-    public Allele getArtificialRefAllele() {
-        return artificialEvent.ref;
-    }
-
-    public Allele getArtificialAltAllele() {
-        return artificialEvent.alt;
-    }
-
-    public int getArtificialAllelePosition() {
-        return artificialEvent.pos;
-    }
-
-    public void setArtificialEvent( final Event artificialEvent ) {
-        this.artificialEvent = artificialEvent;
-    }
-
    @Requires({"refInsertLocation >= 0"})
    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
        // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
@ -260,7 +230,7 @@ public class Haplotype extends Allele {
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant
-        return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation));
+        return new Haplotype(newHaplotypeBases);
    }

    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,