ReadLikelihoods class introduction final changes before merging

Stories:

        https://www.pivotaltracker.com/story/show/70222086
        https://www.pivotaltracker.com/story/show/67961652

Changes:

  Done some changes that I missed in relation with making sure that all PairHMM implentations use the same interface; as a consequence we were running always the standard PairHMM.
  Fixed some additional bugs detected when running it on full wgs single sample and exom multi sample data set.
  Updated some integration test md5s.

Fixing GraphBased bugs with new master code
Fixed ReadLikelihoods.changeReads difficult to spot bug.
Changed PairHMM interface to fix a bug
Fixed missing changes for various PairHMM implementations to get them to use the new structure.
Fixed various bugs only detectable when running with full sample(s).
Believe to have fixed the lack of annotations in UG runs
Fixed integrationt test MD5s
Updating some md5s
Fixed yet another md5 probably left out by mistake
This commit is contained in:
Valentin Ruano-Rubio 2014-07-31 15:47:46 -04:00
parent 0b472f6bff
commit 9a9a68409e
16 changed files with 245 additions and 215 deletions

View File

@ -119,9 +119,12 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc
hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution); hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution);
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList(); final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
final List<Haplotype> supportedHaplotypes = graphLikelihoodEngine.getHaplotypeList(); final List<Haplotype> supportedHaplotypes = graphLikelihoodEngine.getHaplotypeList();
if (supportedHaplotypes.size() != haplotypes.size()) final ReadLikelihoods<Haplotype> result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes, samples, perSampleReadList);
if (supportedHaplotypes.size() != haplotypes.size()) {
logger.warn("Some haplotypes were drop due to missing route on the graph (supported / all): " + supportedHaplotypes.size() + "/" + haplotypes.size()); logger.warn("Some haplotypes were drop due to missing route on the graph (supported / all): " + supportedHaplotypes.size() + "/" + haplotypes.size());
final ReadLikelihoods<Haplotype> result = graphLikelihoodEngine.computeReadLikelihoods(supportedHaplotypes,samples,perSampleReadList); result.addMissingAlleles(haplotypes,Double.NEGATIVE_INFINITY);
}
if (debugMode != DebugMode.NONE) graphLikelihoodDebugDumps(assemblyResultSet.getRegionForGenotyping(), graphLikelihoodEngine,result); if (debugMode != DebugMode.NONE) graphLikelihoodDebugDumps(assemblyResultSet.getRegionForGenotyping(), graphLikelihoodEngine,result);
return result; return result;
} }

View File

@ -338,7 +338,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (newList.size() == 0) if (newList.size() == 0)
continue; continue;
overlappingFilteredReads.put(sample,newList); overlappingFilteredReads.put(sample,newList);
} return overlappingFilteredReads; }
return overlappingFilteredReads;
} }
/** /**
@ -496,7 +497,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
@Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"}) @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) { protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes) {
final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1); final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1);
final Event refEvent = new Event(null); final Event refEvent = new Event(null);

View File

@ -275,13 +275,24 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
// Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities // Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities
final List<GATKSAMRecord> processedReads = modifyReadQualities(likelihoods.reads()); final List<GATKSAMRecord> processedReads = modifyReadQualities(likelihoods.reads());
final Map<GATKSAMRecord,byte[]> gapContinuationPenalties = buildGapContinuationPenalties(processedReads,constantGCP);
// Run the PairHMM to calculate the log10 likelihood of each (processed) reads' arising from each haplotype // Run the PairHMM to calculate the log10 likelihood of each (processed) reads' arising from each haplotype
pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,constantGCP); pairHMMThreadLocal.get().computeLikelihoods(likelihoods,processedReads,gapContinuationPenalties);
if (WRITE_LIKELIHOODS_TO_FILE) if (WRITE_LIKELIHOODS_TO_FILE)
writeDebugLikelihoods(likelihoods); writeDebugLikelihoods(likelihoods);
} }
private Map<GATKSAMRecord, byte[]> buildGapContinuationPenalties(final List<GATKSAMRecord> processedReads, final byte gcp) {
final Map<GATKSAMRecord,byte[]> result = new HashMap<>(processedReads.size());
for (final GATKSAMRecord read : processedReads) {
final byte[] readGcpArray = new byte[read.getReadLength()];
Arrays.fill(readGcpArray,gcp);
result.put(read,readGcpArray);
}
return result;
}
private void writeDebugLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods) { private void writeDebugLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods) {
final List<GATKSAMRecord> reads = likelihoods.reads(); final List<GATKSAMRecord> reads = likelihoods.reads();
final List<Haplotype> haplotypes = likelihoods.alleles(); final List<Haplotype> haplotypes = likelihoods.alleles();

View File

@ -47,11 +47,13 @@
package org.broadinstitute.gatk.tools.walkers.indels; package org.broadinstitute.gatk.tools.walkers.indels;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.clipping.ReadClipper; import org.broadinstitute.gatk.utils.clipping.ReadClipper;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pairhmm.ArrayLoglessPairHMM; import org.broadinstitute.gatk.utils.pairhmm.ArrayLoglessPairHMM;
import org.broadinstitute.gatk.utils.pairhmm.Log10PairHMM; import org.broadinstitute.gatk.utils.pairhmm.Log10PairHMM;
@ -61,12 +63,8 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.utils.sam.ReadUtils;
import htsjdk.variant.variantcontext.Allele;
import java.util.Arrays; import java.util.*;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
public class PairHMMIndelErrorModel { public class PairHMMIndelErrorModel {
@ -297,10 +295,8 @@ public class PairHMMIndelErrorModel {
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) { final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()]; final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
final LinkedList<GATKSAMRecord> readList = new LinkedList<>();
final Map<GATKSAMRecord, byte[]> readGCPArrayMap = new LinkedHashMap<>();
int readIdx=0; int readIdx=0;
for (PileupElement p: pileup) { for (final PileupElement p: pileup) {
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
if (perReadAlleleLikelihoodMap.containsPileupElement(p)) { if (perReadAlleleLikelihoodMap.containsPileupElement(p)) {
@ -439,28 +435,31 @@ public class PairHMMIndelErrorModel {
// Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM // Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM
final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities); final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities);
readList.add(processedRead);
// Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM // Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM
readGCPArrayMap.put(processedRead,contextLogGapContinuationProbabilities); final Map<GATKSAMRecord,byte[]> readGCPArrayMap = Collections.singletonMap(processedRead,contextLogGapContinuationProbabilities);
// Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations // Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations
final Map<Allele, Haplotype> trimmedHaplotypeMap = trimHaplotypes(haplotypeMap, startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, ref); final Map<Allele, Haplotype> trimmedHaplotypeMap = trimHaplotypes(haplotypeMap, startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, ref);
// Apparently more than one allele can map to the same haplotype after trimming
final Set<Haplotype> distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values());
final List<Haplotype> distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
// Get the likelihoods for our clipped read against each of our trimmed haplotypes. // Get the likelihoods for our clipped read against each of our trimmed haplotypes.
final PerReadAlleleLikelihoodMap singleReadRawLikelihoods = pairHMM.computeLikelihoods(readList, trimmedHaplotypeMap, readGCPArrayMap); final ReadLikelihoods<Haplotype> rl = new ReadLikelihoods<>(
Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
final ReadLikelihoods.Matrix<Haplotype> dummySampleLikelihoods = rl.sampleMatrix(0);
pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap);
// Pack the original pilup element, each allele, and each associated log10 likelihood into a final map, and add each likelihood to the array // Pack the original pilup element, each allele, and each associated log10 likelihood into a final map, and add each likelihood to the array
for (Allele a: trimmedHaplotypeMap.keySet()){ for (final Allele a: trimmedHaplotypeMap.keySet()){
double readLikelihood = singleReadRawLikelihoods.getLikelihoodAssociatedWithReadAndAllele(processedRead, a); final Haplotype h = trimmedHaplotypeMap.get(a);
perReadAlleleLikelihoodMap.add(p, a, readLikelihood); final int hIndex = rl.alleleIndex(h);
final double readLikelihood = dummySampleLikelihoods.get(hIndex,0);
readLikelihoods[readIdx][j++] = readLikelihood; readLikelihoods[readIdx][j++] = readLikelihood;
perReadAlleleLikelihoodMap.add(read,a,readLikelihood);
} }
// The readList for sending to the HMM should only ever contain 1 read, as each must be clipped individually
readList.remove(processedRead);
// The same is true for the read/GCP-array map
readGCPArrayMap.remove(processedRead);
} }
} }
readIdx++; readIdx++;

View File

@ -46,10 +46,10 @@
package org.broadinstitute.gatk.utils.pairhmm; package org.broadinstitute.gatk.utils.pairhmm;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import java.io.File; import java.io.File;
import java.lang.reflect.Field; import java.lang.reflect.Field;
@ -192,47 +192,42 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM {
* {@inheritDoc} * {@inheritDoc}
*/ */
@Override @Override
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap){ public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord,byte[]> gcp){
// initialize the pairHMM if necessary // initialize the pairHMM if necessary
if (! initialized) { if (! initialized) {
int readMaxLength = findMaxReadLength(reads); int readMaxLength = findMaxReadLength(processedReads);
int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap); int haplotypeMaxLength = findMaxHaplotypeLength(likelihoods.alleles());
initialize(readMaxLength, haplotypeMaxLength); initialize(readMaxLength, haplotypeMaxLength);
} }
// Pass the read bases/quals, and the haplotypes as a list into the HMM // Pass the read bases/quals, and the haplotypes as a list into the HMM
performBatchAdditions(reads, alleleHaplotypeMap, GCPArrayMap); performBatchAdditions(processedReads, likelihoods.alleles(), gcp);
// Get the log10-likelihoods for each read/haplotype ant pack into the results map // Get the log10-likelihoods for each read/haplotype ant pack into the read-likelihoods matrix.
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); collectLikelihoodResults(likelihoods);
collectLikelihoodResults(reads, alleleHaplotypeMap, likelihoodMap);
return likelihoodMap;
} }
private void collectLikelihoodResults(List<GATKSAMRecord> reads, Map<Allele, Haplotype> alleleHaplotypeMap, PerReadAlleleLikelihoodMap likelihoodMap) { private void collectLikelihoodResults(ReadLikelihoods.Matrix<Haplotype> likelihoods) {
for(final GATKSAMRecord read : reads){ final int numHaplotypes = likelihoods.alleleCount();
final double[] likelihoods = batchGetResult(); final int numReads = likelihoods.readCount();
int jjj = 0; for(int r = 0; r < numReads; r++) {
for (Allele allele : alleleHaplotypeMap.keySet()){ final double[] likelihoodValues = batchGetResult();
final double log10l = likelihoods[jjj];
likelihoodMap.add(read, allele, log10l); for (int h = 0; h < numHaplotypes; h++)
jjj++; likelihoods.set(h,r, likelihoodValues[h]);
}
} }
} }
private void performBatchAdditions(List<GATKSAMRecord> reads, Map<Allele, Haplotype> alleleHaplotypeMap, Map<GATKSAMRecord, byte[]> GCPArrayMap) { private void performBatchAdditions(final List<GATKSAMRecord> reads, final List<Haplotype> haplotypes, Map<GATKSAMRecord,byte[]> gcp) {
final List<Haplotype> haplotypeList = getHaplotypeList(alleleHaplotypeMap);
for(final GATKSAMRecord read : reads){ for(final GATKSAMRecord read : reads){
final byte[] readBases = read.getReadBases(); final byte[] readBases = read.getReadBases();
final byte[] readQuals = read.getBaseQualities(); final byte[] readQuals = read.getBaseQualities();
final byte[] readInsQuals = read.getBaseInsertionQualities(); final byte[] readInsQuals = read.getBaseInsertionQualities();
final byte[] readDelQuals = read.getBaseDeletionQualities(); final byte[] readDelQuals = read.getBaseDeletionQualities();
final byte[] overallGCP = GCPArrayMap.get(read); final byte[] overallGCP = gcp.get(read);
batchAdd(haplotypeList, readBases, readQuals, readInsQuals, readDelQuals, overallGCP); batchAdd(haplotypes, readBases, readQuals, readInsQuals, readDelQuals, overallGCP);
} }
} }

View File

@ -49,23 +49,21 @@ package org.broadinstitute.gatk.utils.pairhmm;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*;
import java.util.List; import java.io.BufferedWriter;
import java.util.Map;
import java.util.HashMap;
import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.BufferedWriter;
import java.util.Map;
import java.util.HashMap;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.broadinstitute.gatk.utils.pairhmm.PairHMMModel.*;
/** /**
@ -156,10 +154,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
* {@inheritDoc} * {@inheritDoc}
*/ */
@Override @Override
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) { public void computeLikelihoods( final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
// (re)initialize the pairHMM only if necessary // (re)initialize the pairHMM only if necessary
final int readMaxLength = verify ? findMaxReadLength(reads) : 0; final int readMaxLength = verify ? findMaxReadLength(processedReads) : 0;
final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0; final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(likelihoods.alleles()) : 0;
if(verify) if(verify)
{ {
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength)
@ -167,15 +165,15 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
if ( ! initialized ) if ( ! initialized )
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode"); throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
} }
int readListSize = reads.size(); int readListSize = processedReads.size();
int numHaplotypes = alleleHaplotypeMap.size(); int numHaplotypes = likelihoods.alleleCount();
int numTestcases = readListSize*numHaplotypes; int numTestcases = readListSize*numHaplotypes;
if(debug0_1) if(debug0_1)
System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
int idx = 0; int idx = 0;
for(GATKSAMRecord read : reads) for(final GATKSAMRecord read : processedReads)
{ {
byte [] overallGCP = GCPArrayMap.get(read); final byte [] overallGCP = GCPArrayMap.get(read);
if(debug0_1) if(debug0_1)
System.out.println("Java read length "+read.getReadBases().length); System.out.println("Java read length "+read.getReadBases().length);
if(debug3) if(debug3)
@ -195,9 +193,9 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
if(verify) if(verify)
{ {
idx = 0; idx = 0;
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always for (final Haplotype h : likelihoods.alleles()) //order is important - access in same order always
{ {
byte[] haplotypeBases = currEntry.getValue().getBases(); byte[] haplotypeBases = h.getBases();
if(debug0_1) if(debug0_1)
System.out.println("Java haplotype length "+haplotypeBases.length); System.out.println("Java haplotype length "+haplotypeBases.length);
if(debug3) if(debug3)
@ -212,10 +210,10 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
PerReadAlleleLikelihoodMap likelihoodMap = null; PerReadAlleleLikelihoodMap likelihoodMap = null;
if(verify) if(verify)
{ {
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); jniPairHMM.computeLikelihoods(likelihoods, processedReads, GCPArrayMap);
likelihoodArray = jniPairHMM.getLikelihoodArray(); likelihoodArray = jniPairHMM.getLikelihoodArray();
//to compare values //to compare values
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); super.computeLikelihoods(likelihoods, processedReads, GCPArrayMap);
} }
else else
{ {
@ -234,13 +232,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
idx = 0; idx = 0;
int idxInsideHaplotypeList = 0; int idxInsideHaplotypeList = 0;
int readIdx = 0; int readIdx = 0;
for(GATKSAMRecord read : reads) for(final GATKSAMRecord read : processedReads)
{ {
for(int j=0;j<numHaplotypes;++j) for(int j=0;j<numHaplotypes;++j)
tmpArray[j] = likelihoodArray[readIdx+j]; tmpArray[j] = likelihoodArray[readIdx+j];
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always for (final Haplotype haplotype : likelihoods.alleles())//order is important - access in same order always
{ {
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype);
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList]; likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
++idx; ++idx;
} }
@ -271,13 +269,13 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
idx = 0; idx = 0;
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
boolean firstLine = true; boolean firstLine = true;
for(GATKSAMRecord read : reads) for(final GATKSAMRecord read : processedReads)
{ {
byte [] overallGCP = GCPArrayMap.get(read); byte [] overallGCP = GCPArrayMap.get(read);
byte[] tmpByteArray = new byte[read.getReadBases().length]; byte[] tmpByteArray = new byte[read.getReadBases().length];
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always for (final Haplotype haplotype : likelihoods.alleles()) //order is important - access in same order always
{ {
byte[] haplotypeBases = currEntry.getValue().getBases(); byte[] haplotypeBases = haplotype.getBases();
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true); debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true); debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
for(int k=0;k<read.getReadBases().length;++k) for(int k=0;k<read.getReadBases().length;++k)
@ -303,7 +301,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true); debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
else else
if(dumpSandboxOnly) if(dumpSandboxOnly)
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]); likelihoods.set(likelihoods.alleleIndex(haplotype),likelihoods.readIndex(read), likelihoodArray[idx]);
++idx; ++idx;
} }
} }
@ -314,7 +312,6 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
//if(numComputeLikelihoodCalls == 5) //if(numComputeLikelihoodCalls == 5)
//jniPairHMM.close(); //jniPairHMM.close();
//System.exit(0); //System.exit(0);
return likelihoodMap;
} }
//Used to test parts of the compute kernel separately //Used to test parts of the compute kernel separately

View File

@ -46,26 +46,16 @@
package org.broadinstitute.gatk.utils.pairhmm; package org.broadinstitute.gatk.utils.pairhmm;
import com.google.java.contract.Ensures; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import com.google.java.contract.Requires;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.io.*;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.HashMap;
//For loading library from jar //For loading library from jar
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/** /**
* Created with IntelliJ IDEA. * Created with IntelliJ IDEA.
@ -154,7 +144,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray); private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize //Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype //Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<Haplotype,Integer>(); private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<>();
private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray; private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
@Override @Override
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; } public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
@ -204,22 +194,23 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
* {@inheritDoc} * {@inheritDoc}
*/ */
@Override @Override
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) { public void computeLikelihoods( final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord,byte[]> gcp ) {
if (processedReads.isEmpty())
return;
if(doProfiling) if(doProfiling)
startTime = System.nanoTime(); startTime = System.nanoTime();
int readListSize = reads.size(); int readListSize = processedReads.size();
int numHaplotypes = alleleHaplotypeMap.size(); int numHaplotypes = likelihoods.alleleCount();
int numTestcases = readListSize*numHaplotypes;
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize]; JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
int idx = 0; int idx = 0;
for(GATKSAMRecord read : reads) for(GATKSAMRecord read : processedReads)
{ {
readDataArray[idx] = new JNIReadDataHolderClass(); readDataArray[idx] = new JNIReadDataHolderClass();
readDataArray[idx].readBases = read.getReadBases(); readDataArray[idx].readBases = read.getReadBases();
readDataArray[idx].readQuals = read.getBaseQualities(); readDataArray[idx].readQuals = read.getBaseQualities();
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities(); readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities(); readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
readDataArray[idx].overallGCP = GCPArrayMap.get(read); readDataArray[idx].overallGCP = gcp.get(read);
++idx; ++idx;
} }
@ -231,19 +222,17 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
// compute_full_prob() // compute_full_prob()
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12); jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
idx = 0;
int idxInsideHaplotypeList = 0;
int readIdx = 0; int readIdx = 0;
for(GATKSAMRecord read : reads) for(int r = 0; r < readListSize; r++)
{ {
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always int hapIdx = 0;
{ for (final Haplotype haplotype : likelihoods.alleles()) {
//Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different, //Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different,
//get idx of current haplotype in the list and use this idx to get the right likelihoodValue //get idx of current haplotype in the list and use this idx to get the right likelihoodValue
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); final int idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype);
likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]); likelihoods.set(hapIdx,r,mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
++idx; ++hapIdx;
} }
readIdx += numHaplotypes; readIdx += numHaplotypes;
} }
@ -256,7 +245,6 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
pairHMMSetupTime += threadLocalSetupTimeDiff; pairHMMSetupTime += threadLocalSetupTimeDiff;
} }
} }
return likelihoodMap;
} }
/** /**

View File

@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true) @Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "ae90d6be28c19e455083a47bc95c3b1b"); executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "a9730f883a2e01e93d56b11321c9fd52");
} }
} }

View File

@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true) @Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9fc4f04105bde31bafc93548745cb67e"); executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","93cf7ed2645599d0ddd319c6989eadcc");
} }
@Test(enabled = true) @Test(enabled = true)

View File

@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" + " -o %s" +
" -L 1:10,000,000-10,500,000", " -L 1:10,000,000-10,500,000",
1, 1,
Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e")); Arrays.asList("24d5f65860507ec7f8d9441bfee8653a"));
executeTest(String.format("test indel caller in SLX"), spec); executeTest(String.format("test indel caller in SLX"), spec);
} }
@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" + " -o %s" +
" -L 1:10,000,000-10,500,000", " -L 1:10,000,000-10,500,000",
1, 1,
Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2")); Arrays.asList("188dc4d70dc5f4de09f645270cc3f4e1"));
executeTest(String.format("test indel calling, multiple technologies"), spec); executeTest(String.format("test indel calling, multiple technologies"), spec);
} }
@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("591c9a7bd7d64974b4a2dd932fdef138")); Arrays.asList("31aaad9adbf12bb7fb12d53080e5c2f5"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
} }
@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("e784b6d1b3725f72a2c42b19e8d24cb1")); Arrays.asList("fecb8ae4e8f5ea2b2bf3826d674298f7"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
} }
@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("d101beb3d1f71a14a31438cce9786881")); Arrays.asList("0662ec6fe6d6e9807f758b1c82adf056"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
} }
@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() { public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1, assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("96c2ab043f523c7676e4bc36dd3c9d0b")); Arrays.asList("3d5140ffc02a125ee26c63ec55449192"));
executeTest("test minIndelFraction 0.0", spec); executeTest("test minIndelFraction 0.0", spec);
} }
@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() { public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1, assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("53ce3aa9ee16302ab2ec21f742b5e157")); Arrays.asList("7c62eb9142d9124e49c123dcc052ce57"));
executeTest("test minIndelFraction 0.25", spec); executeTest("test minIndelFraction 0.25", spec);
} }

View File

@ -53,18 +53,19 @@ import java.util.Arrays;
public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R "
+ b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// testing normal calling // testing normal calling
// //
// -------------------------------------------------------------------------------------------------------------- // ---------------------------------------------------- ----------------------------------------------------------
@Test @Test
public void testMultiSamplePilot1() { public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("058b54cff14527485c7b1ab035ebb6c4")); Arrays.asList("c6462121e5a419546f50d53646274fd3"));
executeTest("test MultiSample Pilot1", spec); executeTest("test MultiSample Pilot1", spec);
} }
@ -96,7 +97,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() { public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("120270ddf48dfd461f756c89ea1ab074")); Arrays.asList("85e8ba3cfeef5e3b72418bcfaec11668"));
executeTest("test Multiple SNP alleles", spec); executeTest("test Multiple SNP alleles", spec);
} }
@ -112,7 +113,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testReverseTrim() { public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2")); Arrays.asList("bd867fb341e75a9f1fc78d25bda93f63"));
executeTest("test reverse trim", spec); executeTest("test reverse trim", spec);
} }
@ -120,7 +121,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() { public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("02fd6357bd1082115822c8a931cbb6a3")); Arrays.asList("e87419cd42fb9684ee3674ee54abe567"));
executeTest("test mismatched PLs", spec); executeTest("test mismatched PLs", spec);
} }
} }

View File

@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"7096aa0ba002757cdcb9e1868d51a85f"); "1a65d432437670d2816a42ea270c06a1");
} }
private void HCTestComplexConsensusMode(String bam, String args, String md5) { private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() { public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"45be554608485d8d3979284277d00f95"); "a5bb8f7eb1f7db997be9fd8928a788f6");
} }
} }

View File

@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"07c5df4cbd91170d0061355c1a8f7fc9"); "4047ea70f37b59b782402f678ea2dfee");
} }
@Test @Test

View File

@ -388,6 +388,55 @@ public class ReadLikelihoodsUnitTest
} }
@Test(dataProvider = "dataSets")
public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
// If all the alleles pass are present in the read-likelihoods collection there is no change.
result.addMissingAlleles(result.alleles(),Double.NEGATIVE_INFINITY);
testLikelihoodMatrixQueries(samples,result,originalLikelihoods);
// If the allele list passed is empty there is no effect.
result.addMissingAlleles(Collections.EMPTY_LIST,Double.NEGATIVE_INFINITY);
testLikelihoodMatrixQueries(samples,result,originalLikelihoods);
final Allele newOne;
final Allele newTwo;
final Allele newThree;
// We add a single missing.
result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6);
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
// We add too more amongst exisisting alleles:
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1),
result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
Assert.assertEquals(original.alleleCount()+3,result.alleleCount());
final List<Allele> expectedAlleles = new ArrayList<>(original.alleles());
expectedAlleles.add(newOne); expectedAlleles.add(newTwo); expectedAlleles.add(newThree);
Assert.assertEquals(result.alleles(),expectedAlleles);
final double[][][] newLikelihoods = new double[originalLikelihoods.length][][];
for (int s = 0; s < samples.length; s++) {
newLikelihoods[s] = Arrays.copyOf(originalLikelihoods[s],originalLikelihoods[s].length + 3);
final int sampleReadCount = original.sampleReadCount(s);
final int originalAlleleCount = originalLikelihoods[s].length;
newLikelihoods[s][originalAlleleCount] = new double[sampleReadCount];
Arrays.fill(newLikelihoods[s][originalAlleleCount],-12345.6);
newLikelihoods[s][originalAlleleCount+1] = new double[sampleReadCount];
Arrays.fill(newLikelihoods[s][originalAlleleCount+1],-6.54321);
newLikelihoods[s][originalAlleleCount+2] = new double[sampleReadCount];
Arrays.fill(newLikelihoods[s][originalAlleleCount+2],-6.54321);
}
testLikelihoodMatrixQueries(samples,result,newLikelihoods);
}
@Test(dataProvider = "dataSets") @Test(dataProvider = "dataSets")
public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) { public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads); final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
@ -480,7 +529,8 @@ public class ReadLikelihoodsUnitTest
{Allele.create("A",true), Allele.create("T"), Allele.create("C")}, {Allele.create("A",true), Allele.create("T"), Allele.create("C")},
{Allele.create("A",true)}, {Allele.create("A",true)},
{Allele.create("ATTTA"), Allele.create("A",true)}, {Allele.create("ATTTA"), Allele.create("A",true)},
{Allele.create("A"), Allele.create("AT",true)} {Allele.create("A"), Allele.create("AT",true)},
{Allele.create("A",false), Allele.create("AT",false)},
}; };
@DataProvider(name="marginalizationDataSets") @DataProvider(name="marginalizationDataSets")

View File

@ -95,7 +95,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
/** /**
* Index of the reference allele if any, otherwise -1. * Index of the reference allele if any, otherwise -1.
*/ */
private final int referenceAlleleIndex; private int referenceAlleleIndex;
/** /**
* Caches the read-list per sample list returned by {@link #sampleReads} * Caches the read-list per sample list returned by {@link #sampleReads}
@ -568,12 +568,66 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
final GATKSAMRecord replacement = readRealignments.get(read); final GATKSAMRecord replacement = readRealignments.get(read);
if (replacement == null) if (replacement == null)
continue; continue;
readIndex.put(sampleReads[r] = replacement,r);
readIndex.remove(read); readIndex.remove(read);
readIndex.put(sampleReads[r] = replacement,r);
} }
} }
} }
/**
* Add alleles that are missing in the read-likelihoods collection giving all reads a default
* likelihood value.
* @param alleles the potentially missing alleles.
* @param defaultLikelihood the default read likelihood value for that allele.
*
* @throws IllegalArgumentException if {@code alleles} is {@code null} or there is more than
* one missing allele that is a reference or there is one but the collection already has
* a reference allele.
*/
public void addMissingAlleles(final Collection<A> alleles, final double defaultLikelihood) {
if (alleles == null)
throw new IllegalArgumentException("the alleles list cannot be null");
if (alleles.isEmpty())
return;
final List<A> allelesToAdd = new ArrayList<A>(alleles.size());
for (final A allele : alleles)
if (!alleleIndex.containsKey(allele))
allelesToAdd.add(allele);
if (allelesToAdd.isEmpty())
return;
final int oldAlleleCount = this.alleles.length;
final int newAlleleCount = this.alleles.length + allelesToAdd.size();
alleleList = null;
int referenceIndex = this.referenceAlleleIndex;
this.alleles = Arrays.copyOf(this.alleles,newAlleleCount);
int nextIndex = oldAlleleCount;
for (final A allele : allelesToAdd) {
if (allele.isReference()) {
if (referenceIndex != -1)
throw new IllegalArgumentException("there cannot be more than one reference allele");
referenceIndex = nextIndex;
}
alleleIndex.put(this.alleles[nextIndex] = allele, nextIndex++);
}
if (referenceIndex != -1)
referenceAlleleIndex = referenceIndex;
final int sampleCount = samples.length;
for (int s = 0; s < sampleCount; s++) {
final int sampleReadCount = readsBySampleIndex[s].length;
final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s],newAlleleCount);
for (int a = oldAlleleCount; a < newAlleleCount; a++) {
newValuesBySampleIndex[a] = new double[sampleReadCount];
if (defaultLikelihood != 0.0)
Arrays.fill(newValuesBySampleIndex[a],defaultLikelihood);
}
valuesBySampleIndex[s] = newValuesBySampleIndex;
}
}
/** /**
* Likelihood matrix between a set of alleles and reads. * Likelihood matrix between a set of alleles and reads.
* @param <A> the allele-type. * @param <A> the allele-type.
@ -740,7 +794,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
* or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele * or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele
* can have zero old alleles mapping nor two new alleles can make reference to the same old allele. * can have zero old alleles mapping nor two new alleles can make reference to the same old allele.
*/ */
public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap,final GenomeLoc overlap) { public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap, final GenomeLoc overlap) {
if (overlap == null) if (overlap == null)
return marginalize(newToOldAlleleMap); return marginalize(newToOldAlleleMap);
@ -775,7 +829,7 @@ public class ReadLikelihoods<A extends Allele> implements Cloneable {
final int newSampleReadCount = sampleReadsToKeep.length; final int newSampleReadCount = sampleReadsToKeep.length;
if (newSampleReadCount == oldSampleReadCount) { if (newSampleReadCount == oldSampleReadCount) {
newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]); newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]);
newReadsBySampleIndex[s] = readsBySampleIndex[s].clone(); newReadsBySampleIndex[s] = oldSampleReads.clone();
} else { } else {
final Object2IntMap<GATKSAMRecord> newSampleReadIndex = newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(newSampleReadCount); final Object2IntMap<GATKSAMRecord> newSampleReadIndex = newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(newSampleReadCount);
final GATKSAMRecord[] newSampleReads = newReadsBySampleIndex[s] = new GATKSAMRecord[newSampleReadCount]; final GATKSAMRecord[] newSampleReads = newReadsBySampleIndex[s] = new GATKSAMRecord[newSampleReadCount];

View File

@ -26,15 +26,15 @@
package org.broadinstitute.gatk.utils.pairhmm; package org.broadinstitute.gatk.utils.pairhmm;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
@ -158,10 +158,9 @@ public abstract class PairHMM {
return listMaxReadLength; return listMaxReadLength;
} }
protected int findMaxHaplotypeLength(final Map<Allele, Haplotype> haplotypeMap) { protected int findMaxHaplotypeLength(final Collection<Haplotype> haplotypes) {
int listMaxHaplotypeLength = 0; int listMaxHaplotypeLength = 0;
for( final Allele a: haplotypeMap.keySet() ) { for( final Haplotype h : haplotypes) {
final Haplotype h = haplotypeMap.get(a);
final int haplotypeLength = h.getBases().length; final int haplotypeLength = h.getBases().length;
if( haplotypeLength > listMaxHaplotypeLength ) { listMaxHaplotypeLength = haplotypeLength; } if( haplotypeLength > listMaxHaplotypeLength ) { listMaxHaplotypeLength = haplotypeLength; }
} }
@ -172,14 +171,20 @@ public abstract class PairHMM {
* Given a list of reads and haplotypes, for every read compute the total probability of said read arising from * Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
* each haplotype given base substitution, insertion, and deletion probabilities. * each haplotype given base substitution, insertion, and deletion probabilities.
* *
* @param processedReads reads to analyze * @param processedReads reads to analyze instead of the ones present in the destination read-likelihoods.
* @param likelihoods where to store the likelihoods where position [a][r] is reserved for the likelihood of {@code reads[r]} * @param likelihoods where to store the likelihoods where position [a][r] is reserved for the likelihood of {@code reads[r]}
* conditional to {@code alleles[a]}. * conditional to {@code alleles[a]}.
* @param constantGCP constant penalty for gap continuations. * @param gcp penalty for gap continuations base array map for processed reads.
*
* @throws IllegalArgumentException
* *
* @return never {@code null}. * @return never {@code null}.
*/ */
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final byte constantGCP) { public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods,
final List<GATKSAMRecord> processedReads,
final Map<GATKSAMRecord,byte[]> gcp) {
if (processedReads.isEmpty())
return;
if(doProfiling) if(doProfiling)
startTime = System.nanoTime(); startTime = System.nanoTime();
// (re)initialize the pairHMM only if necessary // (re)initialize the pairHMM only if necessary
@ -195,13 +200,11 @@ public abstract class PairHMM {
int idx = 0; int idx = 0;
int readIndex = 0; int readIndex = 0;
for(final GATKSAMRecord read : processedReads){ for(final GATKSAMRecord read : processedReads){
final int readLength = read.getReadLength();
final byte[] readBases = read.getReadBases(); final byte[] readBases = read.getReadBases();
final byte[] readQuals = read.getBaseQualities(); final byte[] readQuals = read.getBaseQualities();
final byte[] readInsQuals = read.getBaseInsertionQualities(); final byte[] readInsQuals = read.getBaseInsertionQualities();
final byte[] readDelQuals = read.getBaseDeletionQualities(); final byte[] readDelQuals = read.getBaseDeletionQualities();
final byte[] overallGCP = new byte[readLength]; final byte[] overallGCP = gcp.get(read);
Arrays.fill(overallGCP,constantGCP);
// peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation) // peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
final boolean isFirstHaplotype = true; final boolean isFirstHaplotype = true;
@ -225,78 +228,6 @@ public abstract class PairHMM {
} }
} }
/**
* Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
* each haplotype given base substitution, insertion, and deletion probabilities.
*
* @param reads the list of reads
* @param alleleHaplotypeMap the list of haplotypes
* @param GCPArrayMap Each read is associated with an array containing the gap continuation penalties for use in the model. Length of each GCP-array must match that of its read.
* @return a PerReadAlleleLikelihoodMap containing each read, haplotype-allele, and the log10 probability of
* said read coming from the said haplotype under the provided error model
* @deprecated
*/
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap) {
if(doProfiling)
startTime = System.nanoTime();
// (re)initialize the pairHMM only if necessary
final int readMaxLength = findMaxReadLength(reads);
final int haplotypeMaxLength = findMaxHaplotypeLength(alleleHaplotypeMap);
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) { initialize(readMaxLength, haplotypeMaxLength); }
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
mLikelihoodArray = new double[reads.size()*alleleHaplotypeMap.size()];
int idx = 0;
for(GATKSAMRecord read : reads){
final byte[] readBases = read.getReadBases();
final byte[] readQuals = read.getBaseQualities();
final byte[] readInsQuals = read.getBaseInsertionQualities();
final byte[] readDelQuals = read.getBaseDeletionQualities();
final byte[] overallGCP = GCPArrayMap.get(read);
// peak at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
byte[] currentHaplotypeBases = null;
final boolean isFirstHaplotype = true;
Allele currentAllele = null;
double log10l;
//for (final Allele allele : alleleHaplotypeMap.keySet()){
for (Map.Entry<Allele,Haplotype> currEntry : alleleHaplotypeMap.entrySet()){
//final Haplotype haplotype = alleleHaplotypeMap.get(allele);
final Allele allele = currEntry.getKey();
final Haplotype haplotype = currEntry.getValue();
final byte[] nextHaplotypeBases = haplotype.getBases();
if (currentHaplotypeBases != null) {
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextHaplotypeBases);
mLikelihoodArray[idx++] = log10l;
likelihoodMap.add(read, currentAllele, log10l);
}
// update the current haplotype
currentHaplotypeBases = nextHaplotypeBases;
currentAllele = allele;
}
// process the final haplotype
if (currentHaplotypeBases != null) {
// there is no next haplotype, so pass null for nextHaplotypeBases.
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, null);
likelihoodMap.add(read, currentAllele, log10l);
mLikelihoodArray[idx++] = log10l;
}
}
if(doProfiling)
{
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
//synchronized(doProfiling)
{
pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
}
}
return likelihoodMap;
}
/** /**
* Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion * Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion
* probabilities. * probabilities.