ReadLikelihoods class introduction final changes before merging
Stories:
https://www.pivotaltracker.com/story/show/70222086
https://www.pivotaltracker.com/story/show/67961652
Changes:
Done some changes that I missed in relation with making sure that all PairHMM implentations use the same interface; as a consequence we were running always the standard PairHMM.
Fixed some additional bugs detected when running it on full wgs single sample and exom multi sample data set.
Updated some integration test md5s.
Fixing GraphBased bugs with new master code
Fixed ReadLikelihoods.changeReads difficult to spot bug.
Changed PairHMM interface to fix a bug
Fixed missing changes for various PairHMM implementations to get them to use the new structure.
Fixed various bugs only detectable when running with full sample(s).
Believe to have fixed the lack of annotations in UG runs
Fixed integrationt test MD5s
Updating some md5s
Fixed yet another md5 probably left out by mistake
This commit is contained in:
parent
0b472f6bff
commit
9a9a68409e
|
|
@ -119,9 +119,12 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc
|
|||
hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution);
|
||||
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
|
||||
final List<Haplotype> supportedHaplotypes = graphLikelihoodEngine.getHaplotypeList();
|
||||
if (supportedHaplotypes.size() != haplotypes.size())
|
||||
final ReadLikelihoods<Haplotype> result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes, samples, perSampleReadList);
|
||||
if (supportedHaplotypes.size() != haplotypes.size()) {
|
||||
logger.warn("Some haplotypes were drop due to missing route on the graph (supported / all): " + supportedHaplotypes.size() + "/" + haplotypes.size());
|
||||
final ReadLikelihoods<Haplotype> result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes,samples,perSampleReadList);
|
||||
result.addMissingAlleles(haplotypes,Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
if (debugMode != DebugMode.NONE) graphLikelihoodDebugDumps(assemblyResultSet.getRegionForGenotyping(), graphLikelihoodEngine,result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -338,7 +338,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
if (newList.size() == 0)
|
||||
continue;
|
||||
overlappingFilteredReads.put(sample,newList);
|
||||
} return overlappingFilteredReads;
|
||||
}
|
||||
return overlappingFilteredReads;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -496,7 +497,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
|
||||
@Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
|
||||
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
|
||||
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes) {
|
||||
|
||||
final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1);
|
||||
final Event refEvent = new Event(null);
|
||||
|
|
|
|||
|
|
@ -275,13 +275,24 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
|
|||
// Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities
|
||||
final List<GATKSAMRecord> processedReads = modifyReadQualities(likelihoods.reads());
|
||||
|
||||
final Map<GATKSAMRecord,byte[]> gapContinuationPenalties = buildGapContinuationPenalties(processedReads,constantGCP);
|
||||
// Run the PairHMM to calculate the log10 likelihood of each (processed) reads' arising from each haplotype
|
||||
pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,constantGCP);
|
||||
pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,gapContinuationPenalties);
|
||||
|
||||
if (WRITE_LIKELIHOODS_TO_FILE)
|
||||
writeDebugLikelihoods(likelihoods);
|
||||
}
|
||||
|
||||
private Map<GATKSAMRecord, byte[]> buildGapContinuationPenalties(final List<GATKSAMRecord> processedReads, final byte gcp) {
|
||||
final Map<GATKSAMRecord,byte[]> result = new HashMap<>(processedReads.size());
|
||||
for (final GATKSAMRecord read : processedReads) {
|
||||
final byte[] readGcpArray = new byte[read.getReadLength()];
|
||||
Arrays.fill(readGcpArray,gcp);
|
||||
result.put(read,readGcpArray);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void writeDebugLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods) {
|
||||
final List<GATKSAMRecord> reads = likelihoods.reads();
|
||||
final List<Haplotype> haplotypes = likelihoods.alleles();
|
||||
|
|
|
|||
|
|
@ -47,11 +47,13 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.pairhmm.ArrayLoglessPairHMM;
|
||||
import org.broadinstitute.gatk.utils.pairhmm.Log10PairHMM;
|
||||
|
|
@ -61,12 +63,8 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class PairHMMIndelErrorModel {
|
||||
|
|
@ -297,10 +295,8 @@ public class PairHMMIndelErrorModel {
|
|||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
|
||||
|
||||
final LinkedList<GATKSAMRecord> readList = new LinkedList<>();
|
||||
final Map<GATKSAMRecord, byte[]> readGCPArrayMap = new LinkedHashMap<>();
|
||||
int readIdx=0;
|
||||
for (PileupElement p: pileup) {
|
||||
for (final PileupElement p: pileup) {
|
||||
|
||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||
if (perReadAlleleLikelihoodMap.containsPileupElement(p)) {
|
||||
|
|
@ -439,28 +435,31 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
// Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM
|
||||
final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities);
|
||||
readList.add(processedRead);
|
||||
|
||||
// Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM
|
||||
readGCPArrayMap.put(processedRead,contextLogGapContinuationProbabilities);
|
||||
final Map<GATKSAMRecord,byte[]> readGCPArrayMap = Collections.singletonMap(processedRead,contextLogGapContinuationProbabilities);
|
||||
|
||||
// Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations
|
||||
final Map<Allele, Haplotype> trimmedHaplotypeMap = trimHaplotypes(haplotypeMap, startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, ref);
|
||||
|
||||
// Apparently more than one allele can map to the same haplotype after trimming
|
||||
final Set<Haplotype> distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values());
|
||||
final List<Haplotype> distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
|
||||
// Get the likelihoods for our clipped read against each of our trimmed haplotypes.
|
||||
final PerReadAlleleLikelihoodMap singleReadRawLikelihoods = pairHMM.computeLikelihoods(readList, trimmedHaplotypeMap, readGCPArrayMap);
|
||||
final ReadLikelihoods<Haplotype> rl = new ReadLikelihoods<>(
|
||||
|
||||
Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
|
||||
final ReadLikelihoods.Matrix<Haplotype> dummySampleLikelihoods = rl.sampleMatrix(0);
|
||||
pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap);
|
||||
|
||||
// Pack the original pilup element, each allele, and each associated log10 likelihood into a final map, and add each likelihood to the array
|
||||
for (Allele a: trimmedHaplotypeMap.keySet()){
|
||||
double readLikelihood = singleReadRawLikelihoods.getLikelihoodAssociatedWithReadAndAllele(processedRead, a);
|
||||
perReadAlleleLikelihoodMap.add(p, a, readLikelihood);
|
||||
for (final Allele a: trimmedHaplotypeMap.keySet()){
|
||||
final Haplotype h = trimmedHaplotypeMap.get(a);
|
||||
final int hIndex = rl.alleleIndex(h);
|
||||
final double readLikelihood = dummySampleLikelihoods.get(hIndex,0);
|
||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||
perReadAlleleLikelihoodMap.add(read,a,readLikelihood);
|
||||
}
|
||||
// The readList for sending to the HMM should only ever contain 1 read, as each must be clipped individually
|
||||
readList.remove(processedRead);
|
||||
|
||||
// The same is true for the read/GCP-array map
|
||||
readGCPArrayMap.remove(processedRead);
|
||||
}
|
||||
}
|
||||
readIdx++;
|
||||
|
|
|
|||
|
|
@ -46,10 +46,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.pairhmm;
|
||||
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
|
|
@ -192,47 +192,42 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap){
|
||||
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord,byte[]> gcp){
|
||||
|
||||
// initialize the pairHMM if necessary
|
||||
if (! initialized) {
|
||||
int readMaxLength = findMaxReadLength(reads);
|
||||
int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap);
|
||||
int readMaxLength = findMaxReadLength(processedReads);
|
||||
int haplotypeMaxLength = findMaxHaplotypeLength(likelihoods.alleles());
|
||||
initialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
// Pass the read bases/quals, and the haplotypes as a list into the HMM
|
||||
performBatchAdditions(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
performBatchAdditions(processedReads, likelihoods.alleles(), gcp);
|
||||
|
||||
// Get the log10-likelihoods for each read/haplotype ant pack into the results map
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
collectLikelihoodResults(reads, alleleHaplotypeMap, likelihoodMap);
|
||||
|
||||
return likelihoodMap;
|
||||
// Get the log10-likelihoods for each read/haplotype ant pack into the read-likelihoods matrix.
|
||||
collectLikelihoodResults(likelihoods);
|
||||
}
|
||||
|
||||
private void collectLikelihoodResults(List<GATKSAMRecord> reads, Map<Allele, Haplotype> alleleHaplotypeMap, PerReadAlleleLikelihoodMap likelihoodMap) {
|
||||
for(final GATKSAMRecord read : reads){
|
||||
final double[] likelihoods = batchGetResult();
|
||||
int jjj = 0;
|
||||
for (Allele allele : alleleHaplotypeMap.keySet()){
|
||||
final double log10l = likelihoods[jjj];
|
||||
likelihoodMap.add(read, allele, log10l);
|
||||
jjj++;
|
||||
}
|
||||
private void collectLikelihoodResults(ReadLikelihoods.Matrix<Haplotype> likelihoods) {
|
||||
final int numHaplotypes = likelihoods.alleleCount();
|
||||
final int numReads = likelihoods.readCount();
|
||||
for(int r = 0; r < numReads; r++) {
|
||||
final double[] likelihoodValues = batchGetResult();
|
||||
|
||||
for (int h = 0; h < numHaplotypes; h++)
|
||||
likelihoods.set(h,r, likelihoodValues[h]);
|
||||
}
|
||||
}
|
||||
|
||||
private void performBatchAdditions(List<GATKSAMRecord> reads, Map<Allele, Haplotype> alleleHaplotypeMap, Map<GATKSAMRecord, byte[]> GCPArrayMap) {
|
||||
final List<Haplotype> haplotypeList = getHaplotypeList(alleleHaplotypeMap);
|
||||
private void performBatchAdditions(final List<GATKSAMRecord> reads, final List<Haplotype> haplotypes, Map<GATKSAMRecord,byte[]> gcp) {
|
||||
for(final GATKSAMRecord read : reads){
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
final byte[] readInsQuals = read.getBaseInsertionQualities();
|
||||
final byte[] readDelQuals = read.getBaseDeletionQualities();
|
||||
final byte[] overallGCP = GCPArrayMap.get(read);
|
||||
final byte[] overallGCP = gcp.get(read);
|
||||
|
||||
batchAdd(haplotypeList, readBases, readQuals, readInsQuals, readDelQuals, overallGCP);
|
||||
batchAdd(haplotypes, readBases, readQuals, readInsQuals, readDelQuals, overallGCP);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,23 +49,21 @@ package org.broadinstitute.gatk.utils.pairhmm;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.BufferedWriter;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -156,10 +154,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
|
||||
public void computeLikelihoods( final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
final int readMaxLength = verify ? findMaxReadLength(reads) : 0;
|
||||
final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0;
|
||||
final int readMaxLength = verify ? findMaxReadLength(processedReads) : 0;
|
||||
final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(likelihoods.alleles()) : 0;
|
||||
if(verify)
|
||||
{
|
||||
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength)
|
||||
|
|
@ -167,15 +165,15 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
if ( ! initialized )
|
||||
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
|
||||
}
|
||||
int readListSize = reads.size();
|
||||
int numHaplotypes = alleleHaplotypeMap.size();
|
||||
int readListSize = processedReads.size();
|
||||
int numHaplotypes = likelihoods.alleleCount();
|
||||
int numTestcases = readListSize*numHaplotypes;
|
||||
if(debug0_1)
|
||||
System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
for(final GATKSAMRecord read : processedReads)
|
||||
{
|
||||
byte [] overallGCP = GCPArrayMap.get(read);
|
||||
final byte [] overallGCP = GCPArrayMap.get(read);
|
||||
if(debug0_1)
|
||||
System.out.println("Java read length "+read.getReadBases().length);
|
||||
if(debug3)
|
||||
|
|
@ -195,9 +193,9 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
if(verify)
|
||||
{
|
||||
idx = 0;
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||
for (final Haplotype h : likelihoods.alleles()) //order is important - access in same order always
|
||||
{
|
||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||
byte[] haplotypeBases = h.getBases();
|
||||
if(debug0_1)
|
||||
System.out.println("Java haplotype length "+haplotypeBases.length);
|
||||
if(debug3)
|
||||
|
|
@ -212,10 +210,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
PerReadAlleleLikelihoodMap likelihoodMap = null;
|
||||
if(verify)
|
||||
{
|
||||
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
jniPairHMM.computeLikelihoods(likelihoods, processedReads, GCPArrayMap);
|
||||
likelihoodArray = jniPairHMM.getLikelihoodArray();
|
||||
//to compare values
|
||||
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
super.computeLikelihoods(likelihoods, processedReads, GCPArrayMap);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -234,13 +232,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
for(final GATKSAMRecord read : processedReads)
|
||||
{
|
||||
for(int j=0;j<numHaplotypes;++j)
|
||||
tmpArray[j] = likelihoodArray[readIdx+j];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
for (final Haplotype haplotype : likelihoods.alleles())//order is important - access in same order always
|
||||
{
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype);
|
||||
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
||||
++idx;
|
||||
}
|
||||
|
|
@ -271,13 +269,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
idx = 0;
|
||||
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||
boolean firstLine = true;
|
||||
for(GATKSAMRecord read : reads)
|
||||
for(final GATKSAMRecord read : processedReads)
|
||||
{
|
||||
byte [] overallGCP = GCPArrayMap.get(read);
|
||||
byte[] tmpByteArray = new byte[read.getReadBases().length];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||
for (final Haplotype haplotype : likelihoods.alleles()) //order is important - access in same order always
|
||||
{
|
||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||
byte[] haplotypeBases = haplotype.getBases();
|
||||
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
|
||||
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
|
|
@ -303,7 +301,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
||||
else
|
||||
if(dumpSandboxOnly)
|
||||
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
|
||||
likelihoods.set(likelihoods.alleleIndex(haplotype),likelihoods.readIndex(read), likelihoodArray[idx]);
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
|
|
@ -314,7 +312,6 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
//if(numComputeLikelihoodCalls == 5)
|
||||
//jniPairHMM.close();
|
||||
//System.exit(0);
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
//Used to test parts of the compute kernel separately
|
||||
|
|
|
|||
|
|
@ -46,26 +46,16 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
//For loading library from jar
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
|
|
@ -154,7 +144,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
|
||||
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
|
||||
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
|
||||
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<Haplotype,Integer>();
|
||||
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<>();
|
||||
private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
|
||||
@Override
|
||||
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
|
||||
|
|
@ -204,22 +194,23 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
|
||||
public void computeLikelihoods( final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord,byte[]> gcp ) {
|
||||
if (processedReads.isEmpty())
|
||||
return;
|
||||
if(doProfiling)
|
||||
startTime = System.nanoTime();
|
||||
int readListSize = reads.size();
|
||||
int numHaplotypes = alleleHaplotypeMap.size();
|
||||
int numTestcases = readListSize*numHaplotypes;
|
||||
int readListSize = processedReads.size();
|
||||
int numHaplotypes = likelihoods.alleleCount();
|
||||
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
for(GATKSAMRecord read : processedReads)
|
||||
{
|
||||
readDataArray[idx] = new JNIReadDataHolderClass();
|
||||
readDataArray[idx].readBases = read.getReadBases();
|
||||
readDataArray[idx].readQuals = read.getBaseQualities();
|
||||
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
|
||||
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
|
||||
readDataArray[idx].overallGCP = GCPArrayMap.get(read);
|
||||
readDataArray[idx].overallGCP = gcp.get(read);
|
||||
++idx;
|
||||
}
|
||||
|
||||
|
|
@ -230,20 +221,18 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
// for(haplotypes)
|
||||
// compute_full_prob()
|
||||
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
|
||||
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
for(int r = 0; r < readListSize; r++)
|
||||
{
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
{
|
||||
int hapIdx = 0;
|
||||
for (final Haplotype haplotype : likelihoods.alleles()) {
|
||||
|
||||
//Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different,
|
||||
//get idx of current haplotype in the list and use this idx to get the right likelihoodValue
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
|
||||
++idx;
|
||||
final int idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype);
|
||||
likelihoods.set(hapIdx,r,mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
|
||||
++hapIdx;
|
||||
}
|
||||
readIdx += numHaplotypes;
|
||||
}
|
||||
|
|
@ -256,7 +245,6 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
pairHMMSetupTime += threadLocalSetupTimeDiff;
|
||||
}
|
||||
}
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "ae90d6be28c19e455083a47bc95c3b1b");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "a9730f883a2e01e93d56b11321c9fd52");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9fc4f04105bde31bafc93548745cb67e");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","93cf7ed2645599d0ddd319c6989eadcc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e"));
|
||||
Arrays.asList("24d5f65860507ec7f8d9441bfee8653a"));
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2"));
|
||||
Arrays.asList("188dc4d70dc5f4de09f645270cc3f4e1"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("591c9a7bd7d64974b4a2dd932fdef138"));
|
||||
Arrays.asList("31aaad9adbf12bb7fb12d53080e5c2f5"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("e784b6d1b3725f72a2c42b19e8d24cb1"));
|
||||
Arrays.asList("fecb8ae4e8f5ea2b2bf3826d674298f7"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("d101beb3d1f71a14a31438cce9786881"));
|
||||
Arrays.asList("0662ec6fe6d6e9807f758b1c82adf056"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("96c2ab043f523c7676e4bc36dd3c9d0b"));
|
||||
Arrays.asList("3d5140ffc02a125ee26c63ec55449192"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("53ce3aa9ee16302ab2ec21f742b5e157"));
|
||||
Arrays.asList("7c62eb9142d9124e49c123dcc052ce57"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,18 +53,19 @@ import java.util.Arrays;
|
|||
|
||||
public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R "
|
||||
+ b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing normal calling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
// ---------------------------------------------------- ----------------------------------------------------------
|
||||
@Test
|
||||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("058b54cff14527485c7b1ab035ebb6c4"));
|
||||
Arrays.asList("c6462121e5a419546f50d53646274fd3"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +97,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("120270ddf48dfd461f756c89ea1ab074"));
|
||||
Arrays.asList("85e8ba3cfeef5e3b72418bcfaec11668"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +113,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2"));
|
||||
Arrays.asList("bd867fb341e75a9f1fc78d25bda93f63"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +121,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("02fd6357bd1082115822c8a931cbb6a3"));
|
||||
Arrays.asList("e87419cd42fb9684ee3674ee54abe567"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"7096aa0ba002757cdcb9e1868d51a85f");
|
||||
"1a65d432437670d2816a42ea270c06a1");
|
||||
}
|
||||
|
||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"45be554608485d8d3979284277d00f95");
|
||||
"a5bb8f7eb1f7db997be9fd8928a788f6");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"07c5df4cbd91170d0061355c1a8f7fc9");
|
||||
"4047ea70f37b59b782402f678ea2dfee");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -388,6 +388,55 @@ public class ReadLikelihoodsUnitTest
|
|||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "dataSets")
|
||||
public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
|
||||
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
|
||||
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
|
||||
final ReadLikelihoods<Allele> result = original.clone();
|
||||
|
||||
// If all the alleles pass are present in the read-likelihoods collection there is no change.
|
||||
result.addMissingAlleles(result.alleles(),Double.NEGATIVE_INFINITY);
|
||||
testLikelihoodMatrixQueries(samples,result,originalLikelihoods);
|
||||
|
||||
// If the allele list passed is empty there is no effect.
|
||||
result.addMissingAlleles(Collections.EMPTY_LIST,Double.NEGATIVE_INFINITY);
|
||||
testLikelihoodMatrixQueries(samples,result,originalLikelihoods);
|
||||
|
||||
final Allele newOne;
|
||||
final Allele newTwo;
|
||||
final Allele newThree;
|
||||
|
||||
// We add a single missing.
|
||||
result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6);
|
||||
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
|
||||
|
||||
// We add too more amongst exisisting alleles:
|
||||
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1),
|
||||
result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
|
||||
|
||||
Assert.assertEquals(original.alleleCount()+3,result.alleleCount());
|
||||
|
||||
final List<Allele> expectedAlleles = new ArrayList<>(original.alleles());
|
||||
expectedAlleles.add(newOne); expectedAlleles.add(newTwo); expectedAlleles.add(newThree);
|
||||
|
||||
Assert.assertEquals(result.alleles(),expectedAlleles);
|
||||
|
||||
final double[][][] newLikelihoods = new double[originalLikelihoods.length][][];
|
||||
for (int s = 0; s < samples.length; s++) {
|
||||
newLikelihoods[s] = Arrays.copyOf(originalLikelihoods[s],originalLikelihoods[s].length + 3);
|
||||
final int sampleReadCount = original.sampleReadCount(s);
|
||||
final int originalAlleleCount = originalLikelihoods[s].length;
|
||||
newLikelihoods[s][originalAlleleCount] = new double[sampleReadCount];
|
||||
Arrays.fill(newLikelihoods[s][originalAlleleCount],-12345.6);
|
||||
newLikelihoods[s][originalAlleleCount+1] = new double[sampleReadCount];
|
||||
Arrays.fill(newLikelihoods[s][originalAlleleCount+1],-6.54321);
|
||||
newLikelihoods[s][originalAlleleCount+2] = new double[sampleReadCount];
|
||||
Arrays.fill(newLikelihoods[s][originalAlleleCount+2],-6.54321);
|
||||
}
|
||||
testLikelihoodMatrixQueries(samples,result,newLikelihoods);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "dataSets")
|
||||
public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
|
||||
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
|
||||
|
|
@ -480,7 +529,8 @@ public class ReadLikelihoodsUnitTest
|
|||
{Allele.create("A",true), Allele.create("T"), Allele.create("C")},
|
||||
{Allele.create("A",true)},
|
||||
{Allele.create("ATTTA"), Allele.create("A",true)},
|
||||
{Allele.create("A"), Allele.create("AT",true)}
|
||||
{Allele.create("A"), Allele.create("AT",true)},
|
||||
{Allele.create("A",false), Allele.create("AT",false)},
|
||||
};
|
||||
|
||||
@DataProvider(name="marginalizationDataSets")
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
|
|||
/**
|
||||
* Index of the reference allele if any, otherwise -1.
|
||||
*/
|
||||
private final int referenceAlleleIndex;
|
||||
private int referenceAlleleIndex;
|
||||
|
||||
/**
|
||||
* Caches the read-list per sample list returned by {@link #sampleReads}
|
||||
|
|
@ -568,12 +568,66 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
|
|||
final GATKSAMRecord replacement = readRealignments.get(read);
|
||||
if (replacement == null)
|
||||
continue;
|
||||
readIndex.put(sampleReads[r] = replacement,r);
|
||||
readIndex.remove(read);
|
||||
readIndex.put(sampleReads[r] = replacement,r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add alleles that are missing in the read-likelihoods collection giving all reads a default
|
||||
* likelihood value.
|
||||
* @param alleles the potentially missing alleles.
|
||||
* @param defaultLikelihood the default read likelihood value for that allele.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code alleles} is {@code null} or there is more than
|
||||
* one missing allele that is a reference or there is one but the collection already has
|
||||
* a reference allele.
|
||||
*/
|
||||
public void addMissingAlleles(final Collection<A> alleles, final double defaultLikelihood) {
|
||||
if (alleles == null)
|
||||
throw new IllegalArgumentException("the alleles list cannot be null");
|
||||
if (alleles.isEmpty())
|
||||
return;
|
||||
final List<A> allelesToAdd = new ArrayList<A>(alleles.size());
|
||||
for (final A allele : alleles)
|
||||
if (!alleleIndex.containsKey(allele))
|
||||
allelesToAdd.add(allele);
|
||||
if (allelesToAdd.isEmpty())
|
||||
return;
|
||||
|
||||
final int oldAlleleCount = this.alleles.length;
|
||||
final int newAlleleCount = this.alleles.length + allelesToAdd.size();
|
||||
|
||||
alleleList = null;
|
||||
int referenceIndex = this.referenceAlleleIndex;
|
||||
this.alleles = Arrays.copyOf(this.alleles,newAlleleCount);
|
||||
int nextIndex = oldAlleleCount;
|
||||
for (final A allele : allelesToAdd) {
|
||||
if (allele.isReference()) {
|
||||
if (referenceIndex != -1)
|
||||
throw new IllegalArgumentException("there cannot be more than one reference allele");
|
||||
referenceIndex = nextIndex;
|
||||
}
|
||||
alleleIndex.put(this.alleles[nextIndex] = allele, nextIndex++);
|
||||
}
|
||||
|
||||
if (referenceIndex != -1)
|
||||
referenceAlleleIndex = referenceIndex;
|
||||
|
||||
final int sampleCount = samples.length;
|
||||
for (int s = 0; s < sampleCount; s++) {
|
||||
final int sampleReadCount = readsBySampleIndex[s].length;
|
||||
final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s],newAlleleCount);
|
||||
for (int a = oldAlleleCount; a < newAlleleCount; a++) {
|
||||
newValuesBySampleIndex[a] = new double[sampleReadCount];
|
||||
if (defaultLikelihood != 0.0)
|
||||
Arrays.fill(newValuesBySampleIndex[a],defaultLikelihood);
|
||||
}
|
||||
valuesBySampleIndex[s] = newValuesBySampleIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Likelihood matrix between a set of alleles and reads.
|
||||
* @param <A> the allele-type.
|
||||
|
|
@ -740,7 +794,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
|
|||
* or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele
|
||||
* can have zero old alleles mapping nor two new alleles can make reference to the same old allele.
|
||||
*/
|
||||
public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap,final GenomeLoc overlap) {
|
||||
public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap, final GenomeLoc overlap) {
|
||||
|
||||
if (overlap == null)
|
||||
return marginalize(newToOldAlleleMap);
|
||||
|
|
@ -775,7 +829,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
|
|||
final int newSampleReadCount = sampleReadsToKeep.length;
|
||||
if (newSampleReadCount == oldSampleReadCount) {
|
||||
newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]);
|
||||
newReadsBySampleIndex[s] = readsBySampleIndex[s].clone();
|
||||
newReadsBySampleIndex[s] = oldSampleReads.clone();
|
||||
} else {
|
||||
final Object2IntMap<GATKSAMRecord> newSampleReadIndex = newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(newSampleReadCount);
|
||||
final GATKSAMRecord[] newSampleReads = newReadsBySampleIndex[s] = new GATKSAMRecord[newSampleReadCount];
|
||||
|
|
|
|||
|
|
@ -26,15 +26,15 @@
|
|||
package org.broadinstitute.gatk.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
/**
|
||||
|
|
@ -158,10 +158,9 @@ public abstract class PairHMM {
|
|||
return listMaxReadLength;
|
||||
}
|
||||
|
||||
protected int findMaxHaplotypeLength(final Map<Allele, Haplotype> haplotypeMap) {
|
||||
protected int findMaxHaplotypeLength(final Collection<Haplotype> haplotypes) {
|
||||
int listMaxHaplotypeLength = 0;
|
||||
for( final Allele a: haplotypeMap.keySet() ) {
|
||||
final Haplotype h = haplotypeMap.get(a);
|
||||
for( final Haplotype h : haplotypes) {
|
||||
final int haplotypeLength = h.getBases().length;
|
||||
if( haplotypeLength > listMaxHaplotypeLength ) { listMaxHaplotypeLength = haplotypeLength; }
|
||||
}
|
||||
|
|
@ -172,14 +171,20 @@ public abstract class PairHMM {
|
|||
* Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
|
||||
* each haplotype given base substitution, insertion, and deletion probabilities.
|
||||
*
|
||||
* @param processedReads reads to analyze
|
||||
* @param processedReads reads to analyze instead of the ones present in the destination read-likelihoods.
|
||||
* @param likelihoods where to store the likelihoods where position [a][r] is reserved for the likelihood of {@code reads[r]}
|
||||
* conditional to {@code alleles[a]}.
|
||||
* @param constantGCP constant penalty for gap continuations.
|
||||
* @param gcp penalty for gap continuations base array map for processed reads.
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final byte constantGCP) {
|
||||
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods,
|
||||
final List<GATKSAMRecord> processedReads,
|
||||
final Map<GATKSAMRecord,byte[]> gcp) {
|
||||
if (processedReads.isEmpty())
|
||||
return;
|
||||
if(doProfiling)
|
||||
startTime = System.nanoTime();
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
|
|
@ -195,13 +200,11 @@ public abstract class PairHMM {
|
|||
int idx = 0;
|
||||
int readIndex = 0;
|
||||
for(final GATKSAMRecord read : processedReads){
|
||||
final int readLength = read.getReadLength();
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
final byte[] readInsQuals = read.getBaseInsertionQualities();
|
||||
final byte[] readDelQuals = read.getBaseDeletionQualities();
|
||||
final byte[] overallGCP = new byte[readLength];
|
||||
Arrays.fill(overallGCP,constantGCP);
|
||||
final byte[] overallGCP = gcp.get(read);
|
||||
|
||||
// peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
|
||||
final boolean isFirstHaplotype = true;
|
||||
|
|
@ -225,78 +228,6 @@ public abstract class PairHMM {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
|
||||
* each haplotype given base substitution, insertion, and deletion probabilities.
|
||||
*
|
||||
* @param reads the list of reads
|
||||
* @param alleleHaplotypeMap the list of haplotypes
|
||||
* @param GCPArrayMap Each read is associated with an array containing the gap continuation penalties for use in the model. Length of each GCP-array must match that of its read.
|
||||
* @return a PerReadAlleleLikelihoodMap containing each read, haplotype-allele, and the log10 probability of
|
||||
* said read coming from the said haplotype under the provided error model
|
||||
* @deprecated
|
||||
*/
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap) {
|
||||
if(doProfiling)
|
||||
startTime = System.nanoTime();
|
||||
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
final int readMaxLength = findMaxReadLength(reads);
|
||||
final int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap);
|
||||
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) { initialize(readMaxLength, haplotypeMaxLength); }
|
||||
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
mLikelihoodArray = new double[reads.size()*alleleHaplotypeMap.size()];
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads){
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
final byte[] readInsQuals = read.getBaseInsertionQualities();
|
||||
final byte[] readDelQuals = read.getBaseDeletionQualities();
|
||||
final byte[] overallGCP = GCPArrayMap.get(read);
|
||||
|
||||
// peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
|
||||
byte[] currentHaplotypeBases = null;
|
||||
final boolean isFirstHaplotype = true;
|
||||
Allele currentAllele = null;
|
||||
double log10l;
|
||||
//for (final Allele allele : alleleHaplotypeMap.keySet()){
|
||||
for (Map.Entry<Allele,Haplotype> currEntry : alleleHaplotypeMap.entrySet()){
|
||||
//final Haplotype haplotype = alleleHaplotypeMap.get(allele);
|
||||
final Allele allele = currEntry.getKey();
|
||||
final Haplotype haplotype = currEntry.getValue();
|
||||
final byte[] nextHaplotypeBases = haplotype.getBases();
|
||||
if (currentHaplotypeBases != null) {
|
||||
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
|
||||
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextHaplotypeBases);
|
||||
mLikelihoodArray[idx++] = log10l;
|
||||
likelihoodMap.add(read, currentAllele, log10l);
|
||||
}
|
||||
// update the current haplotype
|
||||
currentHaplotypeBases = nextHaplotypeBases;
|
||||
currentAllele = allele;
|
||||
}
|
||||
// process the final haplotype
|
||||
if (currentHaplotypeBases != null) {
|
||||
|
||||
// there is no next haplotype, so pass null for nextHaplotypeBases.
|
||||
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
|
||||
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, null);
|
||||
likelihoodMap.add(read, currentAllele, log10l);
|
||||
mLikelihoodArray[idx++] = log10l;
|
||||
}
|
||||
}
|
||||
if(doProfiling)
|
||||
{
|
||||
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
|
||||
//synchronized(doProfiling)
|
||||
{
|
||||
pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
|
||||
}
|
||||
}
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion
|
||||
* probabilities.
|
||||
|
|
|
|||
Loading…
Reference in New Issue