diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 48972dfd5..3c0642f83 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -77,6 +77,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { private final static int NUM_PATHS_PER_GRAPH = 25; private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers private static final int GRAPH_KMER_STEP = 6; + private static final int GGA_MODE_ARTIFICIAL_COUNTS = 1000; private final int minKmer; private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; @@ -92,8 +93,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } @Override - protected List assemble(final List reads, final Haplotype refHaplotype) { - final List graphs = new LinkedList(); + protected List assemble(final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { + final List graphs = new LinkedList<>(); final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1; if( maxKmer < minKmer) { @@ -106,7 +107,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { continue; if ( debug ) logger.info("Creating de Bruijn graph for " + kmer + " kmer using " + reads.size() + " reads"); - DeBruijnGraph graph = createGraphFromSequences( reads, kmer, refHaplotype); + DeBruijnGraph graph = createGraphFromSequences(reads, kmer, refHaplotype, activeAlleleHaplotypes); if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object // do a series of steps to clean up the raw assembly graph to make it analysis-ready if ( debugGraphTransformations ) graph.printGraph(new File("unpruned.dot"), pruneFactor); @@ -133,7 +134,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } @Requires({"reads != null", "kmerLength > 0", "refHaplotype != null"}) - protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype ) { + protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { final DeBruijnGraph graph = new DeBruijnGraph(kmerLength); final DeBruijnGraphBuilder builder = new DeBruijnGraphBuilder(graph); @@ -142,8 +143,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // something went wrong, so abort right now with a null graph return null; - // now go through the graph already seeded with the reference sequence and add the read kmers to it - if ( ! addReadKmersToGraph(builder, reads) ) + // now go through the graph already seeded with the reference sequence and add the read kmers to it as well as the artificial GGA haplotypes + if ( ! addReadKmersToGraph(builder, reads, activeAlleleHaplotypes) ) // some problem was detected adding the reads to the graph, return null to indicate we failed return null; @@ -156,11 +157,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { * * @param builder a debruijn graph builder to add the read kmers to * @param reads a non-null list of reads whose kmers we want to add to the graph + * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode * @return true if we successfully added the read kmers to the graph without corrupting it in some way */ - protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads) { + protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads, final List activeAlleleHaplotypes) { final int kmerLength = builder.getKmerSize(); + // First pull kmers out of the artificial GGA haplotypes and throw them on the graph + for( final Haplotype haplotype : activeAlleleHaplotypes ) { + final int end = haplotype.length() - kmerLength; + for( int start = 0; start < end; start++ ) { + builder.addKmerPairFromSeqToGraph( haplotype.getBases(), start, GGA_MODE_ARTIFICIAL_COUNTS ); + } + } + // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { final byte[] sequence = read.getReadBases(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 419ea378f..9bb456230 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -71,7 +71,7 @@ public class GenotypingEngine { private final boolean DEBUG; private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS; - private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + private final static List noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied private final VariantAnnotatorEngine annotationEngine; private final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; @@ -162,8 +162,8 @@ public class GenotypingEngine { final TreeSet startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype); // Walk along each position in the key set and create each event to be outputted - final Set calledHaplotypes = new HashSet(); - final List returnCalls = new ArrayList(); + final Set calledHaplotypes = new HashSet<>(); + final List returnCalls = new ArrayList<>(); for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region final List eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype); @@ -183,7 +183,7 @@ public class GenotypingEngine { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } - final Map mergeMap = new LinkedHashMap(); + final Map mergeMap = new LinkedHashMap<>(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function @@ -244,7 +244,7 @@ public class GenotypingEngine { if ( in_GGA_mode ) startPosKeySet.clear(); - cleanUpSymbolicUnassembledEvents( haplotypes ); + //cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently if ( !in_GGA_mode ) { // run the event merger if we're not in GGA mode final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc); @@ -267,7 +267,7 @@ public class GenotypingEngine { * @return the list of the sources of vcs in the same order */ private List makePriorityList(final List vcs) { - final List priorityList = new LinkedList(); + final List priorityList = new LinkedList<>(); for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource()); return priorityList; } @@ -276,7 +276,7 @@ public class GenotypingEngine { final int loc, final List activeAllelesToGenotype) { // the overlapping events to merge into a common reference view - final List eventsAtThisLoc = new ArrayList(); + final List eventsAtThisLoc = new ArrayList<>(); if( activeAllelesToGenotype.isEmpty() ) { for( final Haplotype h : haplotypes ) { @@ -292,7 +292,7 @@ public class GenotypingEngine { if( compVC.getStart() == loc ) { int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - List alleleSet = new ArrayList(2); + List alleleSet = new ArrayList<>(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; @@ -348,7 +348,7 @@ public class GenotypingEngine { final Map> perSampleFilteredReadList, final VariantContext call ) { - final Map returnMap = new LinkedHashMap(); + final Map returnMap = new LinkedHashMap<>(); final GenomeLoc callLoc = parser.createGenomeLoc(call); for( final Map.Entry sample : perSampleReadMap.entrySet() ) { final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); @@ -384,7 +384,7 @@ public class GenotypingEngine { // TODO - split into input haplotypes and output haplotypes as not to share I/O arguments @Requires("haplotypes != null") protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { - final List haplotypesToRemove = new ArrayList(); + final List haplotypesToRemove = new ArrayList<>(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().getVariantContexts() ) { if( vc.isSymbolic() ) { @@ -407,7 +407,7 @@ public class GenotypingEngine { final Map> alleleMapper, final double downsamplingFraction ) { - final Map alleleReadMap = new LinkedHashMap(); + final Map alleleReadMap = new LinkedHashMap<>(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); for( final Map.Entry> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele @@ -430,7 +430,7 @@ public class GenotypingEngine { } protected static Map> createAlleleMapper( final Map mergeMap, final Map> eventMap ) { - final Map> alleleMapper = new LinkedHashMap>(); + final Map> alleleMapper = new LinkedHashMap<>(); for( final Map.Entry entry : mergeMap.entrySet() ) { alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey()))); } @@ -441,100 +441,33 @@ public class GenotypingEngine { @Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { - final Map> eventMapper = new LinkedHashMap>(eventsAtThisLoc.size()+1); - VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis - eventMapper.put(new Event(null), new ArrayList()); + final Map> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1); + final Event refEvent = new Event(null); + eventMapper.put(refEvent, new ArrayList()); for( final VariantContext vc : eventsAtThisLoc ) { eventMapper.put(new Event(vc), new ArrayList()); } - final List undeterminedHaplotypes = new ArrayList(haplotypes.size()); for( final Haplotype h : haplotypes ) { - if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { - final List alleles = new ArrayList(2); - alleles.add(h.getArtificialRefAllele()); - alleles.add(h.getArtificialAltAllele()); - final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") - .alleles(alleles) - .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + h.getArtificialRefAllele().length() - 1).make() ); - if( eventMapper.containsKey(artificialVC) ) { - eventMapper.get(artificialVC).add(h); - } - } else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later - undeterminedHaplotypes.add(h); + if( h.getEventMap().get(loc) == null ) { + eventMapper.get(refEvent).add(h); } else { - boolean haplotypeIsDetermined = false; for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) { if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) { eventMapper.get(new Event(vcAtThisLoc)).add(h); - haplotypeIsDetermined = true; break; } } - - if( !haplotypeIsDetermined ) - undeterminedHaplotypes.add(h); } } - for( final Haplotype h : undeterminedHaplotypes ) { - Event matchingEvent = new Event(null); - for( final Map.Entry> eventToTest : eventMapper.entrySet() ) { - // don't test against the reference allele - if( eventToTest.getKey().equals(new Event(null)) ) - continue; - - // only try to disambiguate for alleles that have had haplotypes previously assigned above - if( eventToTest.getValue().isEmpty() ) - continue; - - final Haplotype artificialHaplotype = eventToTest.getValue().get(0); - if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) { - matchingEvent = eventToTest.getKey(); - break; - } - } - - eventMapper.get(matchingEvent).add(h); - } - return eventMapper; } - protected static boolean isSubSetOf(final Map subset, final Map superset, final boolean resolveSupersetToSubset) { - - for ( final Map.Entry fromSubset : subset.entrySet() ) { - final VariantContext fromSuperset = superset.get(fromSubset.getKey()); - if ( fromSuperset == null ) - return false; - - List supersetAlleles = fromSuperset.getAlternateAlleles(); - if ( resolveSupersetToSubset ) - supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles); - - if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) ) - return false; - } - - return true; - } - - private static List resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List currentAlleles) { - if ( targetReference.length() <= actualReference.length() ) - return currentAlleles; - - final List newAlleles = new ArrayList(currentAlleles.size()); - final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length()); - for ( final Allele a : currentAlleles ) { - newAlleles.add(Allele.extend(a, extraBases)); - } - return newAlleles; - } - @Ensures({"result.size() == haplotypeAllelesForSample.size()"}) protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; } - final List eventAllelesForSample = new ArrayList(); + final List eventAllelesForSample = new ArrayList<>(); for( final Allele a : haplotypeAllelesForSample ) { final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); for( int iii = 0; iii < alleleMapper.size(); iii++ ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 2ebfbcee9..e0a755c7b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -47,6 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -433,8 +436,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private final static int MIN_READ_LENGTH = 10; private List samplesList = new ArrayList(); - private final static double LOG_ONE_HALF = -Math.log10(2.0); - private final static double LOG_ONE_THIRD = -Math.log10(3.0); private final List allelesToGenotype = new ArrayList(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file @@ -603,7 +604,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // if we don't have any data, just abort early return new ActivityProfileState(ref.getLocus(), 0.0); - final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + final List noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); @@ -625,14 +626,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); - genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); - genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; + genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF ); + genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD; } } genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); } - final List alleles = new ArrayList(); + final List alleles = new ArrayList<>(); alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -746,9 +747,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // Create the reference haplotype which is the bases from the reference that make up the active region finalizeActiveRegion(activeRegion); // merge overlapping fragments, clip adapter and low qual tails - final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING); final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); + final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc); final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); @@ -760,6 +761,21 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } + /** + * Helper function to create the reference haplotype out of the active region and a padded loc + * @param activeRegion the active region from which to generate the reference haplotype + * @param paddedReferenceLoc the GenomeLoc which includes padding and shows how big the reference haplotype should be + * @return a non-null haplotype + */ + private Haplotype createReferenceHaplotype(final ActiveRegion activeRegion, final GenomeLoc paddedReferenceLoc) { + final Haplotype refHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); + refHaplotype.setAlignmentStartHapwrtRef(activeRegion.getExtendedLoc().getStart() - paddedReferenceLoc.getStart()); + final Cigar c = new Cigar(); + c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); + refHaplotype.setCigar(c); + return refHaplotype; + } + /** * Trim down the active region to just enough to properly genotype the events among the haplotypes * @@ -791,7 +807,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // trim down the haplotypes - final Set haplotypeSet = new HashSet(haplotypes.size()); + final Set haplotypeSet = new HashSet<>(haplotypes.size()); for ( final Haplotype h : haplotypes ) { final Haplotype trimmed = h.trim(trimmedActiveRegion.getExtendedLoc()); if ( trimmed != null ) { @@ -802,7 +818,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // create the final list of trimmed haplotypes - final List trimmedHaplotypes = new ArrayList(haplotypeSet); + final List trimmedHaplotypes = new ArrayList<>(haplotypeSet); // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM Collections.sort( trimmedHaplotypes, new HaplotypeBaseComparator() ); @@ -816,7 +832,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // trim down the reads and add them to the trimmed active region - final List trimmedReads = new ArrayList(originalActiveRegion.getReads().size()); + final List trimmedReads = new ArrayList<>(originalActiveRegion.getReads().size()); for( final GATKSAMRecord read : originalActiveRegion.getReads() ) { final GATKSAMRecord clippedRead = ReadClipper.hardClipToRegion( read, trimmedActiveRegion.getExtendedLoc().getStart(), trimmedActiveRegion.getExtendedLoc().getStop() ); if( trimmedActiveRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { @@ -937,7 +953,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } private Map> splitReadsBySample( final Collection reads ) { - final Map> returnMap = new HashMap>(); + final Map> returnMap = new HashMap<>(); for( final String sample : samplesList) { List readList = returnMap.get( sample ); if( readList == null ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index ca1877142..4a1a5993a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -74,7 +74,6 @@ import java.util.*; public class LikelihoodCalculationEngine { private final static Logger logger = Logger.getLogger(LikelihoodCalculationEngine.class); - private static final double LOG_ONE_HALF = -Math.log10(2.0); private final byte constantGCP; private final double log10globalReadMismappingRate; private final boolean DEBUG; @@ -299,7 +298,7 @@ public class LikelihoodCalculationEngine { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. haplotypeLikelihood += ReadUtils.getMeanRepresentativeReadCount( entry.getKey() ) * - ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + LOG_ONE_HALF ); + ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + MathUtils.LOG_ONE_HALF ); } } haplotypeLikelihoodMatrix[iii][jjj] = haplotypeLikelihood; @@ -397,11 +396,11 @@ public class LikelihoodCalculationEngine { if ( haplotypes.size() == 2 ) return haplotypes; // fast path -- we'll always want to use 2 haplotypes // all of the haplotypes that at least one sample called as one of the most likely - final Set selectedHaplotypes = new HashSet(); + final Set selectedHaplotypes = new HashSet<>(); selectedHaplotypes.add(findReferenceHaplotype(haplotypes)); // ref is always one of the selected // our annoying map from allele -> haplotype - final Map allele2Haplotype = new HashMap(); + final Map allele2Haplotype = new HashMap<>(); for ( final Haplotype h : haplotypes ) { h.setScore(h.isReference() ? Double.MAX_VALUE : 0.0); // set all of the scores to 0 (lowest value) for all non-ref haplotypes allele2Haplotype.put(Allele.create(h, h.isReference()), h); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 20b005b40..3a377409c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -111,7 +111,11 @@ public abstract class LocalAssemblyEngine { * @param refHaplotype the reference haplotype * @return a non-null list of reads */ - protected abstract List assemble(List reads, Haplotype refHaplotype); + protected abstract List assemble(List reads, Haplotype refHaplotype, List activeAlleleHaplotypes); + + protected List assemble(List reads, Haplotype refHaplotype) { + return assemble(reads, refHaplotype, Collections.emptyList()); + } /** * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads @@ -128,8 +132,11 @@ public abstract class LocalAssemblyEngine { if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); } if( pruneFactor < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); } + // create the list of artificial haplotypes that should be added to the graph for GGA mode + final List activeAlleleHaplotypes = createActiveAlleleHaplotypes(refHaplotype, activeAllelesToGenotype, activeRegion.getExtendedLoc()); + // create the graphs by calling our subclass assemble method - final List graphs = assemble(activeRegion.getReads(), refHaplotype); + final List graphs = assemble(activeRegion.getReads(), refHaplotype, activeAlleleHaplotypes); // do some QC on the graphs for ( final SeqGraph graph : graphs ) { sanityCheckGraph(graph, refHaplotype); } @@ -138,45 +145,53 @@ public abstract class LocalAssemblyEngine { if ( graphWriter != null ) { printGraphs(graphs); } // find the best paths in the graphs and return them as haplotypes - return findBestPaths( graphs, refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + return findBestPaths( graphs, refHaplotype, refLoc, activeRegion.getExtendedLoc() ); } - @Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"}) - @Ensures({"result.contains(refHaplotype)"}) - protected List findBestPaths(final List graphs, final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow) { - // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes - final Set returnHaplotypes = new LinkedHashSet(); - refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart()); - final Cigar c = new Cigar(); - c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); - refHaplotype.setCigar(c); - returnHaplotypes.add( refHaplotype ); - + /** + * Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype + * @param refHaplotype the reference haplotype + * @param activeAllelesToGenotype the list of alternate alleles in VariantContexts + * @param activeRegionWindow the window containing the reference haplotype + * @return a non-null list of haplotypes + */ + private List createActiveAlleleHaplotypes(final Haplotype refHaplotype, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow) { + final Set returnHaplotypes = new LinkedHashSet<>(); final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); - final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength(); - // for GGA mode, add the desired allele into the haplotype for( final VariantContext compVC : activeAllelesToGenotype ) { for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()); - addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true ); + if( insertedRefHaplotype != null ) { // can be null if the requested allele can't be inserted into the haplotype + returnHaplotypes.add(insertedRefHaplotype); + } } } + return new ArrayList<>(returnHaplotypes); + } + + @Ensures({"result.contains(refHaplotype)"}) + protected List findBestPaths(final List graphs, final Haplotype refHaplotype, final GenomeLoc refLoc, final GenomeLoc activeRegionWindow) { + // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes + final Set returnHaplotypes = new LinkedHashSet<>(); + returnHaplotypes.add( refHaplotype ); + + final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); + for( final SeqGraph graph : graphs ) { final SeqVertex source = graph.getReferenceSourceVertex(); final SeqVertex sink = graph.getReferenceSinkVertex(); if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph); - final KBestPaths pathFinder = new KBestPaths(allowCyclesInKmerGraphToGeneratePaths); + final KBestPaths pathFinder = new KBestPaths<>(allowCyclesInKmerGraphToGeneratePaths); for ( final Path path : pathFinder.getKBestPaths(graph, numBestHaplotypesPerGraph, source, sink) ) { -// logger.info("Found path " + path); Haplotype h = new Haplotype( path.getBases() ); if( !returnHaplotypes.contains(h) ) { final Cigar cigar = path.calculateCigar(refHaplotype.getBases()); if ( cigar == null ) { - // couldn't produce a meaningful alignment of haplotype to reference, fail quitely + // couldn't produce a meaningful alignment of haplotype to reference, fail quietly continue; } else if( cigar.isEmpty() ) { throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + @@ -197,25 +212,6 @@ public abstract class LocalAssemblyEngine { if ( debug ) logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize()); - - // for GGA mode, add the desired allele into the haplotype if it isn't already present - if( !activeAllelesToGenotype.isEmpty() ) { - final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, refWithPadding, refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place - for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); - - // This if statement used to additionally have: - // "|| !vcOnHaplotype.hasSameAllelesAs(compVC)" - // but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto - // a haplotype that already contains a 1bp insertion (so practically it is reference but - // falls into the bin for the 1bp deletion because we keep track of the artificial alleles). - if( vcOnHaplotype == null ) { - for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ); - } - } - } - } } } } @@ -238,7 +234,7 @@ public abstract class LocalAssemblyEngine { } } - return new ArrayList(returnHaplotypes); + return new ArrayList<>(returnHaplotypes); } /** @@ -256,71 +252,6 @@ public abstract class LocalAssemblyEngine { return false; } - /** - * Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype. - * Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information. - * This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based. - * @param haplotype the candidate haplotype - * @param ref the reference bases to align against - * @param haplotypeList the current list of haplotypes - * @param activeRegionStart the start of the active region in the reference byte array - * @param activeRegionStop the stop of the active region in the reference byte array - * @param FORCE_INCLUSION_FOR_GGA_MODE if true will include in the list even if it already exists - * @return true if the candidate haplotype was successfully incorporated into the haplotype list - */ - @Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"}) - private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final Set haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { - if( haplotype == null ) { return false; } - - final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SWParameterSet.STANDARD_NGS ); - haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); - - if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) ); - - final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true); - int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true ); - if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) { - hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal - } - byte[] newHaplotypeBases; - // extend partial haplotypes to contain the full active region sequence - if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll(ArrayUtils.addAll(ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), - haplotype.getBases()), - ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop)); - } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) ); - } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); - } else { - newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop); - } - - final Haplotype h = new Haplotype( newHaplotypeBases ); - final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SWParameterSet.STANDARD_NGS ); - - h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); - if ( haplotype.isArtificialHaplotype() ) { - h.setArtificialEvent(haplotype.getArtificialEvent()); - } - if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) ); - - if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) { - haplotypeList.add(h); - return true; - } else { - return false; - } - } - protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) { if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor); @@ -372,7 +303,6 @@ public abstract class LocalAssemblyEngine { * Perform general QC on the graph to make sure something hasn't gone wrong during assembly * @param graph the graph to check * @param refHaplotype the reference haplotype - * @param */ private void sanityCheckGraph(final BaseGraph graph, final Haplotype refHaplotype) { sanityCheckReferenceGraph(graph, refHaplotype); @@ -383,7 +313,6 @@ public abstract class LocalAssemblyEngine { * * @param graph the graph to check * @param refHaplotype the reference haplotype - * @param */ private void sanityCheckReferenceGraph(final BaseGraph graph, final Haplotype refHaplotype) { if( graph.getReferenceSourceVertex() == null ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index db0ce0880..3d4d38d8e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -62,6 +62,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { private final static Logger logger = Logger.getLogger(ReadThreadingAssembler.class); private final static int DEFAULT_NUM_PATHS_PER_GRAPH = 128; + private final static int GGA_MODE_ARTIFICIAL_COUNTS = 1000; /** The min and max kmer sizes to try when building the graph. */ private final List kmerSizes; @@ -88,7 +89,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { } @Override - public List assemble( final List reads, final Haplotype refHaplotype) { + public List assemble( final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { final List graphs = new LinkedList<>(); for ( final int kmerSize : kmerSizes ) { @@ -96,6 +97,12 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + int hapCount = 0; + for( final Haplotype h : activeAlleleHaplotypes ) { + final int[] counts = new int[h.length()]; + Arrays.fill(counts, GGA_MODE_ARTIFICIAL_COUNTS); + rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), counts, false); + } // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 6e9223afb..8e879377f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -590,11 +590,9 @@ public class ReadThreadingGraph extends BaseGraph(), 10, new Haplotype(refCycle.getBytes(), true)); - final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(noCycle.getBytes(), true)); + final DeBruijnGraph g1 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(refCycle.getBytes(), true), Collections.emptyList()); + final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(noCycle.getBytes(), true), Collections.emptyList()); Assert.assertTrue(g1 == null, "Reference cycle graph should return null during creation."); Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation."); @@ -147,7 +147,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } - assembler.addReadKmersToGraph(builder, Arrays.asList(read)); + assembler.addReadKmersToGraph(builder, Arrays.asList(read), Collections.emptyList()); Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); for ( final Kmer addedKmer : builder.addedPairs ) { Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9ef9fea77..3f3b295f8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "008029ee34e1becd8312e3c4d608033c"); + "38b4596c3910fdde51ea59aa1a8f848f"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "ae8d95ffe77515cc74a55c2afd142826"); + "08147870d73d9749ced8cfc7cdd4714f"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 91e80b45c..5fc0f4f52 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "bb30d0761dc9e2dfd57bfe07b72d06d8"); + "ffd69c410dca0d2f9fe75f3cb5d08179"); } @Test diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java index a517e1cb1..74361de1b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -47,6 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; @@ -216,6 +219,10 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { private List assemble(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final List reads) { final Haplotype refHaplotype = new Haplotype(refBases, true); + final Cigar c = new Cigar(); + c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); + refHaplotype.setCigar(c); + final ActiveRegion activeRegion = new ActiveRegion(loc, null, true, genomeLocParser, 0); activeRegion.addAll(reads); final LocalAssemblyEngine engine = createAssembler(assembler); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java index 8efb3d486..3f10fc72c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java @@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest { public SeqGraph assemble() { assembler.removePathsNotConnectedToRef = false; // need to pass some of the tests assembler.setDebugGraphTransformations(true); - final SeqGraph graph = assembler.assemble(reads, refHaplotype).get(0); + final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.emptyList()).get(0); if ( DEBUG ) graph.printGraph(new File("test.dot"), 0); return graph; } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 49157a206..b158d1509 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -55,17 +55,19 @@ public class MathUtils { private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; private static final double MAX_JACOBIAN_TOLERANCE = 8.0; private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; - private static final int MAXN = 70000; + private static final int MAXN = 70_000; private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients /** * The smallest log10 value we'll emit from normalizeFromLog10 and other functions * where the real-space value is 0.0. */ - public final static double LOG10_P_OF_ZERO = -1000000.0; - public final static double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5); - private final static double NATURAL_LOG_OF_TEN = Math.log(10.0); - private final static double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI); + public static final double LOG10_P_OF_ZERO = -1000000.0; + public static final double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5); + public static final double LOG_ONE_HALF = -Math.log10(2.0); + public static final double LOG_ONE_THIRD = -Math.log10(3.0); + private static final double NATURAL_LOG_OF_TEN = Math.log(10.0); + private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI); static { log10Cache = new double[LOG10_CACHE_SIZE]; diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index f253fc9c9..b309ef633 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -221,7 +221,7 @@ public class PerReadAlleleLikelihoodMap { final int count = ReadUtils.getMeanRepresentativeReadCount(read); final double likelihood_iii = entry.getValue().get(iii_allele); final double likelihood_jjj = entry.getValue().get(jjj_allele); - haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + LOG_ONE_HALF); + haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF); // fast exit. If this diploid pair is already worse than the max, just stop and look at the next pair if ( haplotypeLikelihood < maxElement ) break; @@ -241,7 +241,6 @@ public class PerReadAlleleLikelihoodMap { return new MostLikelyAllele(alleles.get(hap1), alleles.get(hap2), maxElement, maxElement); } - private static final double LOG_ONE_HALF = -Math.log10(2.0); /** * Given a map from alleles to likelihoods, find the allele with the largest likelihood. diff --git a/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java index bacee7942..1f932b222 100644 --- a/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java @@ -46,7 +46,6 @@ public class Haplotype extends Allele { private EventMap eventMap = null; private Cigar cigar; private int alignmentStartHapwrtRef; - private Event artificialEvent = null; private double score = 0; /** @@ -93,11 +92,6 @@ public class Haplotype extends Allele { super(allele, true); } - protected Haplotype( final byte[] bases, final Event artificialEvent ) { - this(bases, false); - this.artificialEvent = artificialEvent; - } - public Haplotype( final byte[] bases, final GenomeLoc loc ) { this(bases, false); this.genomeLocation = loc; @@ -189,7 +183,7 @@ public class Haplotype extends Allele { } /** - * Get the cigar for this haplotype. Note that cigar is guarenteed to be consolidated + * Get the cigar for this haplotype. Note that the cigar is guaranteed to be consolidated * in that multiple adjacent equal operates will have been merged * @return the cigar of this haplotype */ @@ -223,30 +217,6 @@ public class Haplotype extends Allele { throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength()); } - public boolean isArtificialHaplotype() { - return artificialEvent != null; - } - - public Event getArtificialEvent() { - return artificialEvent; - } - - public Allele getArtificialRefAllele() { - return artificialEvent.ref; - } - - public Allele getArtificialAltAllele() { - return artificialEvent.alt; - } - - public int getArtificialAllelePosition() { - return artificialEvent.pos; - } - - public void setArtificialEvent( final Event artificialEvent ) { - this.artificialEvent = artificialEvent; - } - @Requires({"refInsertLocation >= 0"}) public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) { // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates @@ -260,7 +230,7 @@ public class Haplotype extends Allele { newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant - return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation)); + return new Haplotype(newHaplotypeBases); } public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList,