diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java index 7ba81ca8c..ace4f22d2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java @@ -119,9 +119,12 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution); final List haplotypes = assemblyResultSet.getHaplotypeList(); final List supportedHaplotypes = graphLikelihoodEngine.getHaplotypeList(); - if (supportedHaplotypes.size() != haplotypes.size()) + final ReadLikelihoods result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes, samples, perSampleReadList); + if (supportedHaplotypes.size() != haplotypes.size()) { logger.warn("Some haplotypes were drop due to missing route on the graph (supported / all): " + supportedHaplotypes.size() + "/" + haplotypes.size()); - final ReadLikelihoods result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes,samples,perSampleReadList); + result.addMissingAlleles(haplotypes,Double.NEGATIVE_INFINITY); + } + if (debugMode != DebugMode.NONE) graphLikelihoodDebugDumps(assemblyResultSet.getRegionForGenotyping(), graphLikelihoodEngine,result); return result; } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 32f54436c..a1bf09043 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -338,7 +338,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine= eventsAtThisLoc.size() + 1"}) @Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) - protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { + protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes) { final Map> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1); final Event refEvent = new Event(null); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java index e662806fe..4feed71d9 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java @@ -275,13 +275,24 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula // Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities final List processedReads = modifyReadQualities(likelihoods.reads()); + final Map gapContinuationPenalties = buildGapContinuationPenalties(processedReads,constantGCP); // Run the PairHMM to calculate the log10 likelihood of each (processed) reads' arising from each haplotype - pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,constantGCP); + pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,gapContinuationPenalties); if (WRITE_LIKELIHOODS_TO_FILE) writeDebugLikelihoods(likelihoods); } + private Map buildGapContinuationPenalties(final List processedReads, final byte gcp) { + final Map result = new HashMap<>(processedReads.size()); + for (final GATKSAMRecord read : processedReads) { + final byte[] readGcpArray = new byte[read.getReadLength()]; + Arrays.fill(readGcpArray,gcp); + result.put(read,readGcpArray); + } + return result; + } + private void writeDebugLikelihoods(final ReadLikelihoods.Matrix likelihoods) { final List reads = likelihoods.reads(); final List haplotypes = likelihoods.alleles(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java index 32e1dcd0e..a2422c544 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java @@ -47,11 +47,13 @@ package org.broadinstitute.gatk.tools.walkers.indels; import com.google.java.contract.Ensures; +import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.clipping.ReadClipper; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.pairhmm.ArrayLoglessPairHMM; import org.broadinstitute.gatk.utils.pairhmm.Log10PairHMM; @@ -61,12 +63,8 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.ReadUtils; -import htsjdk.variant.variantcontext.Allele; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.Map; +import java.util.*; public class PairHMMIndelErrorModel { @@ -297,10 +295,8 @@ public class PairHMMIndelErrorModel { final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) { final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()]; - final LinkedList readList = new LinkedList<>(); - final Map readGCPArrayMap = new LinkedHashMap<>(); int readIdx=0; - for (PileupElement p: pileup) { + for (final PileupElement p: pileup) { // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) if (perReadAlleleLikelihoodMap.containsPileupElement(p)) { @@ -439,28 +435,31 @@ public class PairHMMIndelErrorModel { // Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities); - readList.add(processedRead); // Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM - readGCPArrayMap.put(processedRead,contextLogGapContinuationProbabilities); + final Map readGCPArrayMap = Collections.singletonMap(processedRead,contextLogGapContinuationProbabilities); // Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations final Map trimmedHaplotypeMap = trimHaplotypes(haplotypeMap, startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, ref); + // Apparently more than one allele can map to the same haplotype after trimming + final Set distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values()); + final List distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()])); // Get the likelihoods for our clipped read against each of our trimmed haplotypes. - final PerReadAlleleLikelihoodMap singleReadRawLikelihoods = pairHMM.computeLikelihoods(readList, trimmedHaplotypeMap, readGCPArrayMap); + final ReadLikelihoods rl = new ReadLikelihoods<>( + + Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead))); + final ReadLikelihoods.Matrix dummySampleLikelihoods = rl.sampleMatrix(0); + pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap); // Pack the original pilup element, each allele, and each associated log10 likelihood into a final map, and add each likelihood to the array - for (Allele a: trimmedHaplotypeMap.keySet()){ - double readLikelihood = singleReadRawLikelihoods.getLikelihoodAssociatedWithReadAndAllele(processedRead, a); - perReadAlleleLikelihoodMap.add(p, a, readLikelihood); + for (final Allele a: trimmedHaplotypeMap.keySet()){ + final Haplotype h = trimmedHaplotypeMap.get(a); + final int hIndex = rl.alleleIndex(h); + final double readLikelihood = dummySampleLikelihoods.get(hIndex,0); readLikelihoods[readIdx][j++] = readLikelihood; + perReadAlleleLikelihoodMap.add(read,a,readLikelihood); } - // The readList for sending to the HMM should only ever contain 1 read, as each must be clipped individually - readList.remove(processedRead); - - // The same is true for the read/GCP-array map - readGCPArrayMap.remove(processedRead); } } readIdx++; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/CnyPairHMM.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/CnyPairHMM.java index 80be36f02..f506b7ba2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/CnyPairHMM.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/CnyPairHMM.java @@ -46,10 +46,10 @@ package org.broadinstitute.gatk.utils.pairhmm; -import org.broadinstitute.gatk.utils.haplotype.Haplotype; -import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.broadinstitute.gatk.utils.haplotype.Haplotype; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import java.io.File; import java.lang.reflect.Field; @@ -192,47 +192,42 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { * {@inheritDoc} */ @Override - public PerReadAlleleLikelihoodMap computeLikelihoods(final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap){ + public void computeLikelihoods(final ReadLikelihoods.Matrix likelihoods, final List processedReads, final Map gcp){ // initialize the pairHMM if necessary if (! initialized) { - int readMaxLength = findMaxReadLength(reads); - int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap); + int readMaxLength = findMaxReadLength(processedReads); + int haplotypeMaxLength = findMaxHaplotypeLength(likelihoods.alleles()); initialize(readMaxLength, haplotypeMaxLength); } // Pass the read bases/quals, and the haplotypes as a list into the HMM - performBatchAdditions(reads, alleleHaplotypeMap, GCPArrayMap); + performBatchAdditions(processedReads, likelihoods.alleles(), gcp); - // Get the log10-likelihoods for each read/haplotype ant pack into the results map - final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); - collectLikelihoodResults(reads, alleleHaplotypeMap, likelihoodMap); - - return likelihoodMap; + // Get the log10-likelihoods for each read/haplotype ant pack into the read-likelihoods matrix. + collectLikelihoodResults(likelihoods); } - private void collectLikelihoodResults(List reads, Map alleleHaplotypeMap, PerReadAlleleLikelihoodMap likelihoodMap) { - for(final GATKSAMRecord read : reads){ - final double[] likelihoods = batchGetResult(); - int jjj = 0; - for (Allele allele : alleleHaplotypeMap.keySet()){ - final double log10l = likelihoods[jjj]; - likelihoodMap.add(read, allele, log10l); - jjj++; - } + private void collectLikelihoodResults(ReadLikelihoods.Matrix likelihoods) { + final int numHaplotypes = likelihoods.alleleCount(); + final int numReads = likelihoods.readCount(); + for(int r = 0; r < numReads; r++) { + final double[] likelihoodValues = batchGetResult(); + + for (int h = 0; h < numHaplotypes; h++) + likelihoods.set(h,r, likelihoodValues[h]); } } - private void performBatchAdditions(List reads, Map alleleHaplotypeMap, Map GCPArrayMap) { - final List haplotypeList = getHaplotypeList(alleleHaplotypeMap); + private void performBatchAdditions(final List reads, final List haplotypes, Map gcp) { for(final GATKSAMRecord read : reads){ final byte[] readBases = read.getReadBases(); final byte[] readQuals = read.getBaseQualities(); final byte[] readInsQuals = read.getBaseInsertionQualities(); final byte[] readDelQuals = read.getBaseDeletionQualities(); - final byte[] overallGCP = GCPArrayMap.get(read); + final byte[] overallGCP = gcp.get(read); - batchAdd(haplotypeList, readBases, readQuals, readInsQuals, readDelQuals, overallGCP); + batchAdd(haplotypes, readBases, readQuals, readInsQuals, readDelQuals, overallGCP); } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java index 89c94a3e2..5be872f0e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java @@ -49,23 +49,21 @@ package org.broadinstitute.gatk.utils.pairhmm; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.gatk.utils.QualityUtils; - +import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import htsjdk.variant.variantcontext.Allele; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*; -import java.util.List; -import java.util.Map; -import java.util.HashMap; +import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; -import java.io.BufferedWriter; -import java.util.Map; -import java.util.HashMap; import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*; /** @@ -156,10 +154,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { * {@inheritDoc} */ @Override - public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) { + public void computeLikelihoods( final ReadLikelihoods.Matrix likelihoods, final List processedReads, final Map GCPArrayMap ) { // (re)initialize the pairHMM only if necessary - final int readMaxLength = verify ? findMaxReadLength(reads) : 0; - final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0; + final int readMaxLength = verify ? findMaxReadLength(processedReads) : 0; + final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(likelihoods.alleles()) : 0; if(verify) { if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) @@ -167,15 +165,15 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { if ( ! initialized ) throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode"); } - int readListSize = reads.size(); - int numHaplotypes = alleleHaplotypeMap.size(); + int readListSize = processedReads.size(); + int numHaplotypes = likelihoods.alleleCount(); int numTestcases = readListSize*numHaplotypes; if(debug0_1) System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); int idx = 0; - for(GATKSAMRecord read : reads) + for(final GATKSAMRecord read : processedReads) { - byte [] overallGCP = GCPArrayMap.get(read); + final byte [] overallGCP = GCPArrayMap.get(read); if(debug0_1) System.out.println("Java read length "+read.getReadBases().length); if(debug3) @@ -195,9 +193,9 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { if(verify) { idx = 0; - for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always + for (final Haplotype h : likelihoods.alleles()) //order is important - access in same order always { - byte[] haplotypeBases = currEntry.getValue().getBases(); + byte[] haplotypeBases = h.getBases(); if(debug0_1) System.out.println("Java haplotype length "+haplotypeBases.length); if(debug3) @@ -212,10 +210,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { PerReadAlleleLikelihoodMap likelihoodMap = null; if(verify) { - jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); + jniPairHMM.computeLikelihoods(likelihoods, processedReads, GCPArrayMap); likelihoodArray = jniPairHMM.getLikelihoodArray(); //to compare values - likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); + super.computeLikelihoods(likelihoods, processedReads, GCPArrayMap); } else { @@ -234,13 +232,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { idx = 0; int idxInsideHaplotypeList = 0; int readIdx = 0; - for(GATKSAMRecord read : reads) + for(final GATKSAMRecord read : processedReads) { for(int j=0;j currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always + for (final Haplotype haplotype : likelihoods.alleles())//order is important - access in same order always { - idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); + idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype); likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList]; ++idx; } @@ -271,13 +269,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { idx = 0; System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); boolean firstLine = true; - for(GATKSAMRecord read : reads) + for(final GATKSAMRecord read : processedReads) { byte [] overallGCP = GCPArrayMap.get(read); byte[] tmpByteArray = new byte[read.getReadBases().length]; - for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always + for (final Haplotype haplotype : likelihoods.alleles()) //order is important - access in same order always { - byte[] haplotypeBases = currEntry.getValue().getBases(); + byte[] haplotypeBases = haplotype.getBases(); debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true); debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true); for(int k=0;k haplotypeToHaplotypeListIdxMap = new HashMap(); + private HashMap haplotypeToHaplotypeListIdxMap = new HashMap<>(); private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray; @Override public HashMap getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; } @@ -204,22 +194,23 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { * {@inheritDoc} */ @Override - public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) { + public void computeLikelihoods( final ReadLikelihoods.Matrix likelihoods, final List processedReads, final Map gcp ) { + if (processedReads.isEmpty()) + return; if(doProfiling) startTime = System.nanoTime(); - int readListSize = reads.size(); - int numHaplotypes = alleleHaplotypeMap.size(); - int numTestcases = readListSize*numHaplotypes; + int readListSize = processedReads.size(); + int numHaplotypes = likelihoods.alleleCount(); JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize]; int idx = 0; - for(GATKSAMRecord read : reads) + for(GATKSAMRecord read : processedReads) { readDataArray[idx] = new JNIReadDataHolderClass(); readDataArray[idx].readBases = read.getReadBases(); readDataArray[idx].readQuals = read.getBaseQualities(); readDataArray[idx].insertionGOP = read.getBaseInsertionQualities(); readDataArray[idx].deletionGOP = read.getBaseDeletionQualities(); - readDataArray[idx].overallGCP = GCPArrayMap.get(read); + readDataArray[idx].overallGCP = gcp.get(read); ++idx; } @@ -230,20 +221,18 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { // for(haplotypes) // compute_full_prob() jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12); - - final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); - idx = 0; - int idxInsideHaplotypeList = 0; + int readIdx = 0; - for(GATKSAMRecord read : reads) + for(int r = 0; r < readListSize; r++) { - for (Map.Entry currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always - { + int hapIdx = 0; + for (final Haplotype haplotype : likelihoods.alleles()) { + //Since the order of haplotypes in the List and alleleHaplotypeMap is different, //get idx of current haplotype in the list and use this idx to get the right likelihoodValue - idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); - likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]); - ++idx; + final int idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype); + likelihoods.set(hapIdx,r,mLikelihoodArray[readIdx + idxInsideHaplotypeList]); + ++hapIdx; } readIdx += numHaplotypes; } @@ -256,7 +245,6 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { pairHMMSetupTime += threadLocalSetupTimeDiff; } } - return likelihoodMap; } /** diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 7ac0c86df..3d6103966 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "ae90d6be28c19e455083a47bc95c3b1b"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "a9730f883a2e01e93d56b11321c9fd52"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 6d95098fe..98a1586f9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9fc4f04105bde31bafc93548745cb67e"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","93cf7ed2645599d0ddd319c6989eadcc"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index e8971f820..286096111 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e")); + Arrays.asList("24d5f65860507ec7f8d9441bfee8653a")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2")); + Arrays.asList("188dc4d70dc5f4de09f645270cc3f4e1")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("591c9a7bd7d64974b4a2dd932fdef138")); + Arrays.asList("31aaad9adbf12bb7fb12d53080e5c2f5")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("e784b6d1b3725f72a2c42b19e8d24cb1")); + Arrays.asList("fecb8ae4e8f5ea2b2bf3826d674298f7")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("d101beb3d1f71a14a31438cce9786881")); + Arrays.asList("0662ec6fe6d6e9807f758b1c82adf056")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("96c2ab043f523c7676e4bc36dd3c9d0b")); + Arrays.asList("3d5140ffc02a125ee26c63ec55449192")); executeTest("test minIndelFraction 0.0", spec); } @@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("53ce3aa9ee16302ab2ec21f742b5e157")); + Arrays.asList("7c62eb9142d9124e49c123dcc052ce57")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 2c61d097e..8398b1776 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -53,18 +53,19 @@ import java.util.Arrays; public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ - private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; // -------------------------------------------------------------------------------------------------------------- // // testing normal calling // - // -------------------------------------------------------------------------------------------------------------- + // ---------------------------------------------------- ---------------------------------------------------------- @Test public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("058b54cff14527485c7b1ab035ebb6c4")); + Arrays.asList("c6462121e5a419546f50d53646274fd3")); executeTest("test MultiSample Pilot1", spec); } @@ -96,7 +97,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("120270ddf48dfd461f756c89ea1ab074")); + Arrays.asList("85e8ba3cfeef5e3b72418bcfaec11668")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +113,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2")); + Arrays.asList("bd867fb341e75a9f1fc78d25bda93f63")); executeTest("test reverse trim", spec); } @@ -120,7 +121,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("02fd6357bd1082115822c8a931cbb6a3")); + Arrays.asList("e87419cd42fb9684ee3674ee54abe567")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 2e8fc67db..573e61df7 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "7096aa0ba002757cdcb9e1868d51a85f"); + "1a65d432437670d2816a42ea270c06a1"); } private void HCTestComplexConsensusMode(String bam, String args, String md5) { @@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "45be554608485d8d3979284277d00f95"); + "a5bb8f7eb1f7db997be9fd8928a788f6"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 033fff46a..86aa7ca21 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "07c5df4cbd91170d0061355c1a8f7fc9"); + "4047ea70f37b59b782402f678ea2dfee"); } @Test diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java index c339a2269..61a6860fd 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java @@ -388,6 +388,55 @@ public class ReadLikelihoodsUnitTest } + @Test(dataProvider = "dataSets") + public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map> reads) { + final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads); + final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original); + final ReadLikelihoods result = original.clone(); + + // If all the alleles pass are present in the read-likelihoods collection there is no change. + result.addMissingAlleles(result.alleles(),Double.NEGATIVE_INFINITY); + testLikelihoodMatrixQueries(samples,result,originalLikelihoods); + + // If the allele list passed is empty there is no effect. + result.addMissingAlleles(Collections.EMPTY_LIST,Double.NEGATIVE_INFINITY); + testLikelihoodMatrixQueries(samples,result,originalLikelihoods); + + final Allele newOne; + final Allele newTwo; + final Allele newThree; + + // We add a single missing. + result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6); + Assert.assertEquals(original.alleleCount() + 1, result.alleleCount()); + + // We add too more amongst exisisting alleles: + result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1), + result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321); + + Assert.assertEquals(original.alleleCount()+3,result.alleleCount()); + + final List expectedAlleles = new ArrayList<>(original.alleles()); + expectedAlleles.add(newOne); expectedAlleles.add(newTwo); expectedAlleles.add(newThree); + + Assert.assertEquals(result.alleles(),expectedAlleles); + + final double[][][] newLikelihoods = new double[originalLikelihoods.length][][]; + for (int s = 0; s < samples.length; s++) { + newLikelihoods[s] = Arrays.copyOf(originalLikelihoods[s],originalLikelihoods[s].length + 3); + final int sampleReadCount = original.sampleReadCount(s); + final int originalAlleleCount = originalLikelihoods[s].length; + newLikelihoods[s][originalAlleleCount] = new double[sampleReadCount]; + Arrays.fill(newLikelihoods[s][originalAlleleCount],-12345.6); + newLikelihoods[s][originalAlleleCount+1] = new double[sampleReadCount]; + Arrays.fill(newLikelihoods[s][originalAlleleCount+1],-6.54321); + newLikelihoods[s][originalAlleleCount+2] = new double[sampleReadCount]; + Arrays.fill(newLikelihoods[s][originalAlleleCount+2],-6.54321); + } + testLikelihoodMatrixQueries(samples,result,newLikelihoods); + } + + @Test(dataProvider = "dataSets") public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map> reads) { final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads); @@ -480,7 +529,8 @@ public class ReadLikelihoodsUnitTest {Allele.create("A",true), Allele.create("T"), Allele.create("C")}, {Allele.create("A",true)}, {Allele.create("ATTTA"), Allele.create("A",true)}, - {Allele.create("A"), Allele.create("AT",true)} + {Allele.create("A"), Allele.create("AT",true)}, + {Allele.create("A",false), Allele.create("AT",false)}, }; @DataProvider(name="marginalizationDataSets") diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java index f03dfbe78..bf12d3a33 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java @@ -95,7 +95,7 @@ public class ReadLikelihoods implements Cloneable { /** * Index of the reference allele if any, otherwise -1. */ - private final int referenceAlleleIndex; + private int referenceAlleleIndex; /** * Caches the read-list per sample list returned by {@link #sampleReads} @@ -568,12 +568,66 @@ public class ReadLikelihoods implements Cloneable { final GATKSAMRecord replacement = readRealignments.get(read); if (replacement == null) continue; - readIndex.put(sampleReads[r] = replacement,r); readIndex.remove(read); + readIndex.put(sampleReads[r] = replacement,r); } } } + /** + * Add alleles that are missing in the read-likelihoods collection giving all reads a default + * likelihood value. + * @param alleles the potentially missing alleles. + * @param defaultLikelihood the default read likelihood value for that allele. + * + * @throws IllegalArgumentException if {@code alleles} is {@code null} or there is more than + * one missing allele that is a reference or there is one but the collection already has + * a reference allele. + */ + public void addMissingAlleles(final Collection alleles, final double defaultLikelihood) { + if (alleles == null) + throw new IllegalArgumentException("the alleles list cannot be null"); + if (alleles.isEmpty()) + return; + final List allelesToAdd = new ArrayList(alleles.size()); + for (final A allele : alleles) + if (!alleleIndex.containsKey(allele)) + allelesToAdd.add(allele); + if (allelesToAdd.isEmpty()) + return; + + final int oldAlleleCount = this.alleles.length; + final int newAlleleCount = this.alleles.length + allelesToAdd.size(); + + alleleList = null; + int referenceIndex = this.referenceAlleleIndex; + this.alleles = Arrays.copyOf(this.alleles,newAlleleCount); + int nextIndex = oldAlleleCount; + for (final A allele : allelesToAdd) { + if (allele.isReference()) { + if (referenceIndex != -1) + throw new IllegalArgumentException("there cannot be more than one reference allele"); + referenceIndex = nextIndex; + } + alleleIndex.put(this.alleles[nextIndex] = allele, nextIndex++); + } + + if (referenceIndex != -1) + referenceAlleleIndex = referenceIndex; + + final int sampleCount = samples.length; + for (int s = 0; s < sampleCount; s++) { + final int sampleReadCount = readsBySampleIndex[s].length; + final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s],newAlleleCount); + for (int a = oldAlleleCount; a < newAlleleCount; a++) { + newValuesBySampleIndex[a] = new double[sampleReadCount]; + if (defaultLikelihood != 0.0) + Arrays.fill(newValuesBySampleIndex[a],defaultLikelihood); + } + valuesBySampleIndex[s] = newValuesBySampleIndex; + } + } + /** * Likelihood matrix between a set of alleles and reads. * @param the allele-type. @@ -740,7 +794,7 @@ public class ReadLikelihoods implements Cloneable { * or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele * can have zero old alleles mapping nor two new alleles can make reference to the same old allele. */ - public ReadLikelihoods marginalize(final Map> newToOldAlleleMap,final GenomeLoc overlap) { + public ReadLikelihoods marginalize(final Map> newToOldAlleleMap, final GenomeLoc overlap) { if (overlap == null) return marginalize(newToOldAlleleMap); @@ -775,7 +829,7 @@ public class ReadLikelihoods implements Cloneable { final int newSampleReadCount = sampleReadsToKeep.length; if (newSampleReadCount == oldSampleReadCount) { newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]); - newReadsBySampleIndex[s] = readsBySampleIndex[s].clone(); + newReadsBySampleIndex[s] = oldSampleReads.clone(); } else { final Object2IntMap newSampleReadIndex = newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(newSampleReadCount); final GATKSAMRecord[] newSampleReads = newReadsBySampleIndex[s] = new GATKSAMRecord[newSampleReadCount]; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/pairhmm/PairHMM.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/pairhmm/PairHMM.java index eb0d408e3..6c4460cb3 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/pairhmm/PairHMM.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/pairhmm/PairHMM.java @@ -26,15 +26,15 @@ package org.broadinstitute.gatk.utils.pairhmm; import com.google.java.contract.Requires; +import htsjdk.variant.variantcontext.Allele; import org.apache.log4j.Logger; import org.broadinstitute.gatk.utils.MathUtils; -import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import htsjdk.variant.variantcontext.Allele; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; /** @@ -158,10 +158,9 @@ public abstract class PairHMM { return listMaxReadLength; } - protected int findMaxHaplotypeLength(final Map haplotypeMap) { + protected int findMaxHaplotypeLength(final Collection haplotypes) { int listMaxHaplotypeLength = 0; - for( final Allele a: haplotypeMap.keySet() ) { - final Haplotype h = haplotypeMap.get(a); + for( final Haplotype h : haplotypes) { final int haplotypeLength = h.getBases().length; if( haplotypeLength > listMaxHaplotypeLength ) { listMaxHaplotypeLength = haplotypeLength; } } @@ -172,14 +171,20 @@ public abstract class PairHMM { * Given a list of reads and haplotypes, for every read compute the total probability of said read arising from * each haplotype given base substitution, insertion, and deletion probabilities. * - * @param processedReads reads to analyze + * @param processedReads reads to analyze instead of the ones present in the destination read-likelihoods. * @param likelihoods where to store the likelihoods where position [a][r] is reserved for the likelihood of {@code reads[r]} * conditional to {@code alleles[a]}. - * @param constantGCP constant penalty for gap continuations. + * @param gcp penalty for gap continuations base array map for processed reads. + * + * @throws IllegalArgumentException * * @return never {@code null}. */ - public void computeLikelihoods(final ReadLikelihoods.Matrix likelihoods, final List processedReads, final byte constantGCP) { + public void computeLikelihoods(final ReadLikelihoods.Matrix likelihoods, + final List processedReads, + final Map gcp) { + if (processedReads.isEmpty()) + return; if(doProfiling) startTime = System.nanoTime(); // (re)initialize the pairHMM only if necessary @@ -195,13 +200,11 @@ public abstract class PairHMM { int idx = 0; int readIndex = 0; for(final GATKSAMRecord read : processedReads){ - final int readLength = read.getReadLength(); final byte[] readBases = read.getReadBases(); final byte[] readQuals = read.getBaseQualities(); final byte[] readInsQuals = read.getBaseInsertionQualities(); final byte[] readDelQuals = read.getBaseDeletionQualities(); - final byte[] overallGCP = new byte[readLength]; - Arrays.fill(overallGCP,constantGCP); + final byte[] overallGCP = gcp.get(read); // peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation) final boolean isFirstHaplotype = true; @@ -225,78 +228,6 @@ public abstract class PairHMM { } } - /** - * Given a list of reads and haplotypes, for every read compute the total probability of said read arising from - * each haplotype given base substitution, insertion, and deletion probabilities. - * - * @param reads the list of reads - * @param alleleHaplotypeMap the list of haplotypes - * @param GCPArrayMap Each read is associated with an array containing the gap continuation penalties for use in the model. Length of each GCP-array must match that of its read. - * @return a PerReadAlleleLikelihoodMap containing each read, haplotype-allele, and the log10 probability of - * said read coming from the said haplotype under the provided error model - * @deprecated - */ - public PerReadAlleleLikelihoodMap computeLikelihoods(final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap) { - if(doProfiling) - startTime = System.nanoTime(); - - // (re)initialize the pairHMM only if necessary - final int readMaxLength = findMaxReadLength(reads); - final int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap); - if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) { initialize(readMaxLength, haplotypeMaxLength); } - - final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); - mLikelihoodArray = new double[reads.size()*alleleHaplotypeMap.size()]; - int idx = 0; - for(GATKSAMRecord read : reads){ - final byte[] readBases = read.getReadBases(); - final byte[] readQuals = read.getBaseQualities(); - final byte[] readInsQuals = read.getBaseInsertionQualities(); - final byte[] readDelQuals = read.getBaseDeletionQualities(); - final byte[] overallGCP = GCPArrayMap.get(read); - - // peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation) - byte[] currentHaplotypeBases = null; - final boolean isFirstHaplotype = true; - Allele currentAllele = null; - double log10l; - //for (final Allele allele : alleleHaplotypeMap.keySet()){ - for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()){ - //final Haplotype haplotype = alleleHaplotypeMap.get(allele); - final Allele allele = currEntry.getKey(); - final Haplotype haplotype = currEntry.getValue(); - final byte[] nextHaplotypeBases = haplotype.getBases(); - if (currentHaplotypeBases != null) { - log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases, - readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextHaplotypeBases); - mLikelihoodArray[idx++] = log10l; - likelihoodMap.add(read, currentAllele, log10l); - } - // update the current haplotype - currentHaplotypeBases = nextHaplotypeBases; - currentAllele = allele; - } - // process the final haplotype - if (currentHaplotypeBases != null) { - - // there is no next haplotype, so pass null for nextHaplotypeBases. - log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases, - readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, null); - likelihoodMap.add(read, currentAllele, log10l); - mLikelihoodArray[idx++] = log10l; - } - } - if(doProfiling) - { - threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime); - //synchronized(doProfiling) - { - pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff; - } - } - return likelihoodMap; - } - /** * Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion * probabilities.