First step in refactoring UG way of storing indel likelihoods - main motive is that rank sum annotations require per-read quality or likelihood information, and even the question "what allele of a variant is present in a read" which is trivial for SNPs may not be that straightforward for indels.
This step just changes storage of likelihoods so now we have, instead of an internal matrix, a class member which stores, as a hash table, a mapping from pileup element to an (allele, likelihood) pair. There's no functional change aside from internal data storage. As a bonus, we get for free a 2-3x improvement in speed in calling because redundant likelihood computations are removed. Next step will hook this up to, and redefine annotation engine interaction with UG for indel case. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5809 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3ccc08ace4
commit
5a7444e186
|
|
@ -34,10 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -57,9 +59,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
private boolean DEBUG = false;
|
private boolean DEBUG = false;
|
||||||
|
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
// gdebug removeme
|
|
||||||
// todo -cleanup
|
private HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap;
|
||||||
private HaplotypeIndelErrorModel model;
|
private LinkedHashMap<Allele,Haplotype> haplotypeMap;
|
||||||
|
|
||||||
|
// gdebug removeme
|
||||||
|
// todo -cleanup
|
||||||
|
private HaplotypeIndelErrorModel model;
|
||||||
private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
|
private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
|
||||||
//
|
//
|
||||||
private GenomeLoc lastSiteVisited;
|
private GenomeLoc lastSiteVisited;
|
||||||
|
|
@ -84,6 +90,7 @@ private HaplotypeIndelErrorModel model;
|
||||||
model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
|
model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
|
||||||
INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
|
INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
|
||||||
}
|
}
|
||||||
|
|
||||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
|
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
|
||||||
alleleList = new ArrayList<Allele>();
|
alleleList = new ArrayList<Allele>();
|
||||||
|
|
@ -91,6 +98,10 @@ private HaplotypeIndelErrorModel model;
|
||||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||||
|
|
||||||
|
indelLikelihoodMap = new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
||||||
|
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -294,7 +305,8 @@ private HaplotypeIndelErrorModel model;
|
||||||
// starting a new site: clear allele list
|
// starting a new site: clear allele list
|
||||||
alleleList.clear();
|
alleleList.clear();
|
||||||
lastSiteVisited = ref.getLocus().clone();
|
lastSiteVisited = ref.getLocus().clone();
|
||||||
|
indelLikelihoodMap.clear();
|
||||||
|
haplotypeMap.clear();
|
||||||
|
|
||||||
if (getAlleleListFromVCF) {
|
if (getAlleleListFromVCF) {
|
||||||
|
|
||||||
|
|
@ -341,7 +353,7 @@ private HaplotypeIndelErrorModel model;
|
||||||
int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||||
// assume only one alt allele for now
|
// assume only one alt allele for now
|
||||||
|
|
||||||
List<Haplotype> haplotypesInVC;
|
//List<Haplotype> haplotypesInVC;
|
||||||
|
|
||||||
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
||||||
int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
||||||
|
|
@ -354,7 +366,7 @@ private HaplotypeIndelErrorModel model;
|
||||||
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
||||||
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
||||||
|
|
||||||
haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
|
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
|
||||||
ref, hsize, numPrefBases);
|
ref, hsize, numPrefBases);
|
||||||
|
|
||||||
// For each sample, get genotype likelihoods based on pileup
|
// For each sample, get genotype likelihoods based on pileup
|
||||||
|
|
@ -362,9 +374,6 @@ private HaplotypeIndelErrorModel model;
|
||||||
// initialize the GenotypeLikelihoods
|
// initialize the GenotypeLikelihoods
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
|
|
||||||
double[][] haplotypeLikehoodMatrix;
|
|
||||||
|
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||||
|
|
||||||
|
|
@ -375,15 +384,14 @@ private HaplotypeIndelErrorModel model;
|
||||||
pileup = context.getBasePileup();
|
pileup = context.getBasePileup();
|
||||||
|
|
||||||
if (pileup != null ) {
|
if (pileup != null ) {
|
||||||
|
double[] genotypeLikelihoods;
|
||||||
if (useOldWrongHorribleHackedUpLikelihoodModel)
|
if (useOldWrongHorribleHackedUpLikelihoodModel)
|
||||||
haplotypeLikehoodMatrix = model.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC);
|
genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
|
||||||
else
|
else
|
||||||
haplotypeLikehoodMatrix = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, ref, HAPLOTYPE_SIZE, eventLength);
|
genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, HAPLOTYPE_SIZE, eventLength, indelLikelihoodMap);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double[] genotypeLikelihoods = HaplotypeIndelErrorModel.getHaplotypeLikelihoods( haplotypeLikehoodMatrix);
|
|
||||||
|
|
||||||
GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
|
GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
refAllele,
|
refAllele,
|
||||||
altAllele,
|
altAllele,
|
||||||
|
|
@ -398,4 +406,7 @@ private HaplotypeIndelErrorModel model;
|
||||||
|
|
||||||
return refAllele;
|
return refAllele;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.AlignmentBlock;
|
import net.sf.samtools.AlignmentBlock;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class HaplotypeIndelErrorModel {
|
public class HaplotypeIndelErrorModel {
|
||||||
|
|
@ -419,7 +421,7 @@ public class HaplotypeIndelErrorModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[][] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, List<Haplotype> haplotypesInVC){
|
public double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, HashMap<Allele,Haplotype> haplotypesInVC){
|
||||||
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
|
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
|
||||||
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
|
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
|
||||||
int i=0;
|
int i=0;
|
||||||
|
|
@ -429,7 +431,8 @@ public class HaplotypeIndelErrorModel {
|
||||||
}
|
}
|
||||||
// for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
|
// for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
|
||||||
// = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
|
// = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
|
||||||
for (int j=0; j < haplotypesInVC.size(); j++) {
|
int j=0;
|
||||||
|
for (Allele a: haplotypesInVC.keySet()) {
|
||||||
readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(j), read);
|
readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(j), read);
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.print(read.getReadName()+" ");
|
System.out.print(read.getReadName()+" ");
|
||||||
|
|
@ -438,7 +441,7 @@ public class HaplotypeIndelErrorModel {
|
||||||
read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
|
read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
|
||||||
read.getCigarString(), readLikelihoods[i][j]);
|
read.getCigarString(), readLikelihoods[i][j]);
|
||||||
}
|
}
|
||||||
|
j++;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
@ -465,11 +468,11 @@ public class HaplotypeIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return haplotypeLikehoodMatrix;
|
return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
|
private double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
|
||||||
int hSize = haplotypeLikehoodMatrix.length;
|
int hSize = haplotypeLikehoodMatrix.length;
|
||||||
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
|
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
|
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
|
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
|
||||||
|
|
@ -41,6 +42,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -48,9 +50,7 @@ import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -697,29 +697,31 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillGapProbabilities(int hIndex, int[] hrunProfile,
|
private void fillGapProbabilities(int[] hrunProfile,
|
||||||
double[][] contextLogGapOpenProbabilities, double[][] contextLogGapContinuationProbabilities) {
|
double[] contextLogGapOpenProbabilities, double[] contextLogGapContinuationProbabilities) {
|
||||||
// fill based on lookup table
|
// fill based on lookup table
|
||||||
for (int i = 0; i < hrunProfile.length; i++) {
|
for (int i = 0; i < hrunProfile.length; i++) {
|
||||||
if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
|
if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
|
||||||
contextLogGapOpenProbabilities[hIndex][i] = GAP_OPEN_PROB_TABLE[MAX_HRUN_GAP_IDX-1];
|
contextLogGapOpenProbabilities[i] = GAP_OPEN_PROB_TABLE[MAX_HRUN_GAP_IDX-1];
|
||||||
contextLogGapContinuationProbabilities[hIndex][i] = GAP_CONT_PROB_TABLE[MAX_HRUN_GAP_IDX-1];
|
contextLogGapContinuationProbabilities[i] = GAP_CONT_PROB_TABLE[MAX_HRUN_GAP_IDX-1];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
contextLogGapOpenProbabilities[hIndex][i] = GAP_OPEN_PROB_TABLE[hrunProfile[i]];
|
contextLogGapOpenProbabilities[i] = GAP_OPEN_PROB_TABLE[hrunProfile[i]];
|
||||||
contextLogGapContinuationProbabilities[hIndex][i] = GAP_CONT_PROB_TABLE[hrunProfile[i]];
|
contextLogGapContinuationProbabilities[i] = GAP_CONT_PROB_TABLE[hrunProfile[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public synchronized double[][] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, List<Haplotype> haplotypesInVC,
|
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
|
||||||
ReferenceContext ref, int haplotypeSize, int eventLength){
|
ReferenceContext ref, int haplotypeSize, int eventLength,
|
||||||
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
|
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
||||||
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
|
|
||||||
|
int numHaplotypes = haplotypeMap.size();
|
||||||
|
double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||||
|
double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes];
|
||||||
int readIdx=0;
|
int readIdx=0;
|
||||||
|
|
||||||
double[][] contextLogGapOpenProbabilities = null;
|
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
||||||
double[][] contextLogGapContinuationProbabilities = null;
|
LinkedHashMap<Allele,double[]> gapContProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
||||||
|
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("Reference bases:");
|
System.out.println("Reference bases:");
|
||||||
|
|
@ -727,15 +729,15 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doContextDependentPenalties && !getGapPenaltiesFromFile) {
|
if (doContextDependentPenalties && !getGapPenaltiesFromFile) {
|
||||||
// will context dependent probabilities based on homopolymet run. Probabilities are filled based on total complete haplotypes.
|
// will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
|
||||||
|
|
||||||
for (int j=0; j < haplotypesInVC.size(); j++) {
|
|
||||||
Haplotype haplotype = haplotypesInVC.get(j);
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
Haplotype haplotype = haplotypeMap.get(a);
|
||||||
byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
||||||
if (contextLogGapOpenProbabilities == null) {
|
double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
|
||||||
contextLogGapOpenProbabilities = new double[haplotypesInVC.size()][haplotypeBases.length];
|
double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
|
||||||
contextLogGapContinuationProbabilities = new double[haplotypesInVC.size()][haplotypeBases.length];
|
|
||||||
}
|
|
||||||
// get homopolymer length profile for current haplotype
|
// get homopolymer length profile for current haplotype
|
||||||
int[] hrunProfile = new int[haplotypeBases.length];
|
int[] hrunProfile = new int[haplotypeBases.length];
|
||||||
getContextHomopolymerLength(haplotypeBases,hrunProfile);
|
getContextHomopolymerLength(haplotypeBases,hrunProfile);
|
||||||
|
|
@ -746,12 +748,25 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.format("%d",hrunProfile[i]);
|
System.out.format("%d",hrunProfile[i]);
|
||||||
System.out.println();
|
System.out.println();
|
||||||
}
|
}
|
||||||
fillGapProbabilities(j, hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
||||||
|
|
||||||
|
gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
|
||||||
|
gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (SAMRecord pread : pileup.getReads()) {
|
for (PileupElement p: pileup) {
|
||||||
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(pread);
|
|
||||||
|
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||||
|
if (indelLikelihoodMap.containsKey(p)) {
|
||||||
|
HashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||||
|
int j=0;
|
||||||
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
readLikelihoods[readIdx][j++] = el.get(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
|
||||||
if (read == null)
|
if (read == null)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -760,6 +775,8 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
double[] recalQuals = null;
|
double[] recalQuals = null;
|
||||||
|
|
||||||
|
|
||||||
if (getGapPenaltiesFromFile) {
|
if (getGapPenaltiesFromFile) {
|
||||||
RecalDataManager.parseSAMRecord( read, RAC );
|
RecalDataManager.parseSAMRecord( read, RAC );
|
||||||
|
|
||||||
|
|
@ -823,7 +840,7 @@ public class PairHMMIndelErrorModel {
|
||||||
numEndSoftClippedBases = read.getUnclippedEnd()- read.getAlignmentEnd();
|
numEndSoftClippedBases = read.getUnclippedEnd()- read.getAlignmentEnd();
|
||||||
|
|
||||||
// check for hard clips (never consider these bases):
|
// check for hard clips (never consider these bases):
|
||||||
/* Cigar c = read.getCigar();
|
/* Cigar c = read.getCigar();
|
||||||
CigarElement first = c.getCigarElement(0);
|
CigarElement first = c.getCigarElement(0);
|
||||||
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
|
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
|
||||||
int numStartHardClippedBases = 0, numEndHardClippedBases = 0;
|
int numStartHardClippedBases = 0, numEndHardClippedBases = 0;
|
||||||
|
|
@ -903,13 +920,18 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
||||||
|
|
||||||
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
|
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
System.out.println("BAD READ!!");
|
System.out.println("BAD READ!!");
|
||||||
|
|
||||||
for (int j=0; j < haplotypesInVC.size(); j++) {
|
int j=0;
|
||||||
readLikelihoods[readIdx][j]= 0;
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
readEl.put(a,0.0);
|
||||||
|
readLikelihoods[readIdx][j++] = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases,
|
byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases,
|
||||||
|
|
@ -930,12 +952,11 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.println(new String(readBases));
|
System.out.println(new String(readBases));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int j=0;
|
||||||
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
|
||||||
// start and stop have indices into
|
|
||||||
|
|
||||||
for (int j=0; j < haplotypesInVC.size(); j++) {
|
|
||||||
Haplotype haplotype = haplotypesInVC.get(j);
|
|
||||||
|
|
||||||
|
Haplotype haplotype = haplotypeMap.get(a);
|
||||||
if (stop > haplotype.getStopPosition())
|
if (stop > haplotype.getStopPosition())
|
||||||
stop = haplotype.getStopPosition();
|
stop = haplotype.getStopPosition();
|
||||||
|
|
||||||
|
|
@ -954,6 +975,7 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.println(new String(haplotypeBases));
|
System.out.println(new String(haplotypeBases));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Double readLikelihood = 0.0;
|
||||||
if (useAffineGapModel) {
|
if (useAffineGapModel) {
|
||||||
|
|
||||||
double[] currentContextGOP = null;
|
double[] currentContextGOP = null;
|
||||||
|
|
@ -962,23 +984,25 @@ public class PairHMMIndelErrorModel {
|
||||||
if (doContextDependentPenalties) {
|
if (doContextDependentPenalties) {
|
||||||
|
|
||||||
if (getGapPenaltiesFromFile) {
|
if (getGapPenaltiesFromFile) {
|
||||||
readLikelihoods[readIdx][j]= computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
|
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
currentContextGOP = Arrays.copyOfRange(contextLogGapOpenProbabilities[j], (int)indStart, (int)indStop);
|
currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
|
||||||
currentContextGCP = Arrays.copyOfRange(contextLogGapContinuationProbabilities[j], (int)indStart, (int)indStop);
|
currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
|
||||||
readLikelihoods[readIdx][j]= computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
|
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
readLikelihoods[readIdx][j]= computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
|
readLikelihood = computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
|
||||||
|
|
||||||
|
|
||||||
|
readEl.put(a,readLikelihood);
|
||||||
|
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
indelLikelihoodMap.put(p,readEl);
|
||||||
|
}
|
||||||
readIdx++;
|
readIdx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -992,8 +1016,8 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
for (int i=0; i < haplotypesInVC.size(); i++) {
|
for (int i=0; i < numHaplotypes; i++) {
|
||||||
for (int j=i; j < haplotypesInVC.size(); j++){
|
for (int j=i; j < numHaplotypes; j++){
|
||||||
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
||||||
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
||||||
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
||||||
|
|
@ -1002,7 +1026,7 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||||
// First term is approximated by Jacobian log with table lookup.
|
// First term is approximated by Jacobian log with table lookup.
|
||||||
if (Double.isInfinite(readLikelihoods[readIdx][i]) || Double.isInfinite(readLikelihoods[readIdx][j]))
|
if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
|
||||||
continue;
|
continue;
|
||||||
haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
|
haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
|
||||||
readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
|
readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
|
||||||
|
|
@ -1013,7 +1037,7 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return haplotypeLikehoodMatrix;
|
return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,9 +31,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class Haplotype {
|
public class Haplotype {
|
||||||
protected byte[] bases = null;
|
protected byte[] bases = null;
|
||||||
|
|
@ -108,11 +106,11 @@ public class Haplotype {
|
||||||
return isReference;
|
return isReference;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||||
final int haplotypeSize, final int numPrefBases) {
|
final int haplotypeSize, final int numPrefBases) {
|
||||||
|
|
||||||
|
|
||||||
List<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||||
|
|
||||||
Allele refAllele = null;
|
Allele refAllele = null;
|
||||||
|
|
||||||
|
|
@ -153,11 +151,11 @@ public class Haplotype {
|
||||||
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
||||||
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
||||||
|
|
||||||
haplotypeList.add(new Haplotype(haplotypeString.getBytes(), locus, a.isReference()));
|
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus, a.isReference()));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return haplotypeList;
|
return haplotypeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue