- Genotype assignment in case of equally likeli combination is now random
- Genotype combinations with 0 confidence are now left unphased
This commit is contained in:
parent
81b163ff4d
commit
1f044faedd
|
|
@ -109,6 +109,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
private final Byte NUM_HET_HET_HET = 4;
|
private final Byte NUM_HET_HET_HET = 4;
|
||||||
private final Byte NUM_VIOLATIONS = 5;
|
private final Byte NUM_VIOLATIONS = 5;
|
||||||
|
|
||||||
|
//Random number generator
|
||||||
|
private Random rand = new Random();
|
||||||
|
|
||||||
private enum FamilyMember {
|
private enum FamilyMember {
|
||||||
MOTHER,
|
MOTHER,
|
||||||
FATHER,
|
FATHER,
|
||||||
|
|
@ -309,17 +312,22 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
|
|
||||||
private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
|
private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
|
||||||
|
|
||||||
|
int phredScoreTransmission = -1;
|
||||||
|
if(transmissionProb != NO_TRANSMISSION_PROB)
|
||||||
|
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
|
||||||
|
|
||||||
//Handle null, missing and unavailable genotypes
|
//Handle null, missing and unavailable genotypes
|
||||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
||||||
//genotype so it is safe to return the original genotype in this case.
|
//genotype so it is safe to return the original genotype in this case.
|
||||||
if(genotype == null || !phasedGenotype.isAvailable() || phasedGenotype.isNoCall())
|
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
|
||||||
|
if(phredScoreTransmission ==0 || genotype == null || !phasedGenotype.isAvailable() || phasedGenotype.isNoCall())
|
||||||
return genotype;
|
return genotype;
|
||||||
|
|
||||||
//Add the transmission probability
|
//Add the transmission probability
|
||||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||||
genotypeAttributes.putAll(genotype.getAttributes());
|
genotypeAttributes.putAll(genotype.getAttributes());
|
||||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, MathUtils.probabilityToPhredScale(1-(transmissionProb)));
|
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||||
|
|
||||||
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);
|
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);
|
||||||
for(Allele allele : phasedGenotype.getAlleles()){
|
for(Allele allele : phasedGenotype.getAlleles()){
|
||||||
|
|
@ -538,36 +546,46 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
//Prior vars
|
//Prior vars
|
||||||
double bestConfigurationLikelihood = 0.0;
|
double bestConfigurationLikelihood = 0.0;
|
||||||
double norm = 0.0;
|
double norm = 0.0;
|
||||||
boolean isMV = false;
|
int configuration_index =0;
|
||||||
int bestConfigurationGenotypeDiffs=4;
|
ArrayList<Boolean> isMV = new ArrayList<Boolean>();
|
||||||
Genotype.Type bestMotherGenotype = getTypeSafeNull(mother);
|
isMV.add(false);
|
||||||
Genotype.Type bestFatherGenotype = getTypeSafeNull(father);
|
ArrayList<Genotype.Type> bestMotherGenotype = new ArrayList<Genotype.Type>();
|
||||||
Genotype.Type bestChildGenotype = getTypeSafeNull(child);
|
bestMotherGenotype.add(getTypeSafeNull(mother));
|
||||||
|
ArrayList<Genotype.Type> bestFatherGenotype = new ArrayList<Genotype.Type>();
|
||||||
|
bestFatherGenotype.add(getTypeSafeNull(father));
|
||||||
|
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
|
||||||
|
bestChildGenotype.add(getTypeSafeNull(child));
|
||||||
|
|
||||||
//Get the most likely combination
|
//Get the most likely combination
|
||||||
//Only check for most likely combination if at least a parent and the child have genotypes
|
//Only check for most likely combination if at least a parent and the child have genotypes
|
||||||
if(childLikelihoods.size()>2 && (motherLikelihoods.size() + fatherLikelihoods.size())>3){
|
if(childLikelihoods.size()>2 && (motherLikelihoods.size() + fatherLikelihoods.size())>3){
|
||||||
int mvCount;
|
int mvCount;
|
||||||
double configurationLikelihood;
|
double configurationLikelihood;
|
||||||
int configurationGenotypeDiffs;
|
|
||||||
for(Map.Entry<Genotype.Type,Double> motherGenotype : motherLikelihoods.entrySet()){
|
for(Map.Entry<Genotype.Type,Double> motherGenotype : motherLikelihoods.entrySet()){
|
||||||
for(Map.Entry<Genotype.Type,Double> fatherGenotype : fatherLikelihoods.entrySet()){
|
for(Map.Entry<Genotype.Type,Double> fatherGenotype : fatherLikelihoods.entrySet()){
|
||||||
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
|
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
|
||||||
mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey());
|
mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey());
|
||||||
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue();
|
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue();
|
||||||
norm += configurationLikelihood;
|
norm += configurationLikelihood;
|
||||||
configurationGenotypeDiffs = countFamilyGenotypeDiff(mother.getType(),father.getType(),child.getType(),motherGenotype.getKey(),fatherGenotype.getKey(),childGenotype.getKey());
|
|
||||||
//Keep this combination if
|
//Keep this combination if
|
||||||
//It has a better likelihood
|
//It has a better likelihood
|
||||||
//Or it has the same likelihood but requires less changes from original genotypes
|
//Or it has the same likelihood but requires less changes from original genotypes
|
||||||
if ((configurationLikelihood > bestConfigurationLikelihood) ||
|
if (configurationLikelihood > bestConfigurationLikelihood){
|
||||||
(configurationLikelihood == bestConfigurationLikelihood && configurationGenotypeDiffs < bestConfigurationGenotypeDiffs)) {
|
|
||||||
bestConfigurationLikelihood = configurationLikelihood;
|
bestConfigurationLikelihood = configurationLikelihood;
|
||||||
bestMotherGenotype = motherGenotype.getKey();
|
isMV.clear();
|
||||||
bestFatherGenotype = fatherGenotype.getKey();
|
isMV.add(mvCount>0);
|
||||||
bestChildGenotype = childGenotype.getKey();
|
bestMotherGenotype.clear();
|
||||||
isMV = mvCount>0;
|
bestMotherGenotype.add(motherGenotype.getKey());
|
||||||
bestConfigurationGenotypeDiffs=configurationGenotypeDiffs;
|
bestFatherGenotype.clear();
|
||||||
|
bestFatherGenotype.add(fatherGenotype.getKey());
|
||||||
|
bestChildGenotype.clear();
|
||||||
|
bestChildGenotype.add(childGenotype.getKey());
|
||||||
|
}
|
||||||
|
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
||||||
|
bestMotherGenotype.add(motherGenotype.getKey());
|
||||||
|
bestFatherGenotype.add(fatherGenotype.getKey());
|
||||||
|
bestChildGenotype.add(childGenotype.getKey());
|
||||||
|
isMV.add(mvCount>0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -575,17 +593,22 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
|
|
||||||
//normalize the best configuration probability
|
//normalize the best configuration probability
|
||||||
bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
|
bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
|
||||||
|
|
||||||
|
//In case of multiple equally likely combinations, take a random one
|
||||||
|
if(bestMotherGenotype.size()>1){
|
||||||
|
configuration_index = rand.nextInt(bestMotherGenotype.size()-1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
|
bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get the phased alleles for the genotype configuration
|
TrioPhase phasedTrioGenotypes = transmissionMatrix.get(bestMotherGenotype.get(configuration_index)).get(bestFatherGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
|
||||||
TrioPhase phasedTrioGenotypes = transmissionMatrix.get(bestMotherGenotype).get(bestFatherGenotype).get(bestChildGenotype);
|
|
||||||
|
|
||||||
//Return the phased genotypes
|
//Return the phased genotypes
|
||||||
phasedTrioGenotypes.getPhasedGenotypes(ref,alt,mother,father,child,bestConfigurationLikelihood,finalGenotypes);
|
phasedTrioGenotypes.getPhasedGenotypes(ref,alt,mother,father,child,bestConfigurationLikelihood,finalGenotypes);
|
||||||
return isMV;
|
return isMV.get(configuration_index);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue