Genotype quality is now recalculated for each of the phased Genotypes. Small problem is that we unnecessarily loose a little precision on the genotypes that do not change after assignment.

This commit is contained in:
Laurent Francioli 2011-10-20 17:04:19 +02:00
parent 1c61a57329
commit edea90786a
2 changed files with 31 additions and 7 deletions

View File

@ -266,6 +266,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes());
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, transmissionProb);
genotype = Genotype.modifyAttributes(genotype, genotypeAttributes);
boolean isPhased = true;
@ -274,7 +275,6 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//If unphased, return original genotype
for(AlleleType allele : phasedAlleles){
if(allele == AlleleType.NO_CALL){
genotype = Genotype.modifyAttributes(genotype, genotypeAttributes);
return genotype;
}
//Otherwise add the appropriate allele
@ -293,13 +293,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
alleles.add(altAllele);
}
}
//TODO: Recalculate GQ field for the new genotype
//Remove the GQ attribute as it needs to be recalculated for the new genotype assignment
//double[] likelihoods = genotype.getLikelihoods().getAsVector();
//genotypeAttributes.put(VCFConstants.GENOTYPE_QUALITY_KEY,likelihoods[1]);
genotype = Genotype.modifyAttributes(genotype, genotypeAttributes);
return new Genotype(genotype.getSampleName(), alleles, genotype.getNegLog10PError(), null, genotype.getAttributes(), isPhased);
return new Genotype(genotype.getSampleName(), alleles, genotype.getLikelihoods().getLog10GQ(genotype.getType()), null, genotype.getAttributes(), isPhased);
}

View File

@ -25,8 +25,10 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.jgrapht.util.MathUtil;
import java.util.EnumMap;
import java.util.Map;
@ -98,6 +100,7 @@ public class GenotypeLikelihoods {
return likelihoodsAsString_PLs;
}
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
@ -108,6 +111,32 @@ public class GenotypeLikelihoods {
return likelihoodsMap;
}
//Return the log10 Genotype Quality (GQ) for the given genotype
public double getLog10GQ(Genotype.Type genotype){
double qual = Double.NEGATIVE_INFINITY;
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
if(likelihood.getKey() == genotype)
continue;
if(likelihood.getValue() > qual)
qual = likelihood.getValue();
}
qual = likelihoods.get(genotype) - qual;
if (qual < 0) {
// QUAL can be negative if the chosen genotype is not the most likely one individually.
// In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen one
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
double chosenGenotype = normalized[genotype.ordinal()-1];
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
return qual;
}
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
String[] strings = likelihoodsAsString_PLs.split(",");