gatk-3.8/archive/java/src/org/broadinstitute/sting/glf/LikelihoodObject.java

291 lines
10 KiB
Java
Executable File

package org.broadinstitute.sting.utils.genotype;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.HashMap;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* <p/>
* Class LikelyhoodObject
* <p/>
* An object used to store likelyhood information for genotypes. Genotype
* likelihoods are assumed to be infinite (negitive log likelihood), unless set.
* This allows the consumer to make an empty LikelihoodObject, and just set
* those values which have associated likelihood values.
*/
// TODO -- DELETE ME GLF
public class LikelihoodObject {
// our possible genotypes, in order according to GLFv3
public enum GENOTYPE {
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
}
// our pileup of bases
//final private String basePileup;
// possible types of likihoods to store
public enum LIKELIHOOD_TYPE {
NEGATIVE_LOG, LOG, RAW;
}
// our liklihood storage type
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGATIVE_LOG;
// default the bestGenotype likelihoods to the allele AA
protected GENOTYPE bestGenotype = GENOTYPE.AA;
// how many genotypes we're storing
public static final int genoTypeCount = GENOTYPE.values().length;
// the associated negitive log likelihoods for each genotype
protected final HashMap<GENOTYPE, Double> likelihoods = new HashMap<GENOTYPE, Double>();
/** create a blank likelihood object */
public LikelihoodObject() {
for (GENOTYPE type : GENOTYPE.values()) {
likelihoods.put(type, Double.MAX_VALUE);
}
}
/**
* create a likelihood object, given a picard style GenotypeLikelihoods object. The
* GenotypeLikelihoods stores likelihoods in log likelihood format, and we want them in
* negitive log likelihood
*
* @param lk the likelihood object
*/
public LikelihoodObject(GenotypeLikelihoods lk) {
mLikelihoodType = LIKELIHOOD_TYPE.LOG;
Double minValue = Double.MAX_VALUE;
for (GENOTYPE type : GENOTYPE.values()) {
byte[] bases = new byte[2];
bases[0] = (byte) type.toString().charAt(0);
bases[1] = (byte) type.toString().charAt(1);
double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases));
likelihoods.put(type, val);
if (val < minValue) {
bestGenotype = type;
}
}
}
/**
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
*
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
* @param type the likelihood storage type
*/
public LikelihoodObject(double[] values, LIKELIHOOD_TYPE type) {
mLikelihoodType = type;
if (values.length != GENOTYPE.values().length) {
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
}
findBestLikelihood(values);
}
/**
* find the best likelihood
* @param values
*/
private void findBestLikelihood(double[] values) {
int index = 0;
double lowestScore = Double.MAX_VALUE;
for (GENOTYPE t : GENOTYPE.values()) {
likelihoods.put(t, values[index]);
if (values[index] < lowestScore) {
lowestScore = values[index];
bestGenotype = t;
}
++index;
}
}
/**
* set the likelihood, given it's probability and the genotype
*
* @param type the genotype
* @param lh the likelihood as a double
*/
public void setLikelihood(GENOTYPE type, double lh) {
likelihoods.put(type, lh);
if (lh < likelihoods.get(this.bestGenotype)) {
this.bestGenotype = type;
}
}
/**
* find the minimum likelihood value stored in the set. This represents the most likely genotype,
* since genotypes are represented as negitive log likeihoods
*
* @return the min value
*/
public double getBestLikelihood() {
return likelihoods.get(this.bestGenotype);
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order.
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
* not the 255 range we need.
*
* @return a byte array of the genotype values
*/
public short[] toByteArray() {
short ret[] = new short[GENOTYPE.values().length];
int index = 0;
for (GENOTYPE type : GENOTYPE.values()) {
ret[index] = (likelihoods.get(type).intValue() > 254) ? 255 : (short) likelihoods.get(type).intValue();
++index;
}
return ret;
}
/**
* create a float array of our genotype values, in order specified in the GENOTYPE enum (currently the GLF and
* geli ordering).
*
* @return a float array containing our genotype likelihoods, as negitive log likelihoods
*/
public double[] toDoubleArray() {
// make an array of floats
double[] ft = new double[10];
int index = 0;
for (GENOTYPE T : GENOTYPE.values()) {
ft[index] = this.likelihoods.get(T).doubleValue();
index++;
}
return ft;
}
/**
* convert this object, with aditional information, to a GenotypeLikelihoods object. This involves determining
* what our underlying storage type is, and coverting our values to the appropriate (log likelihood) format.
*
* @return a GenotypeLikelihoods object representing our data
*/
public GenotypeLikelihoods convertToGenotypeLikelihoods(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
double[] ft = toDoubleArray();
float[] db = new float[ft.length];
int index = 0;
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
for (; index < ft.length; index++) {
db[index] = ((float) ft[index] * -1.0f);
}
} else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
for (; index < ft.length; index++) {
db[index] = (float) Math.log(ft[index]);
}
} else {
for (int x = 0; x < ft.length; x++)
db[x] = (float)ft[x];
}
return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db);
}
/**
* getter for the likelihood type
*
* @return our likelihood storage type
*/
public LIKELIHOOD_TYPE getLikelihoodType() {
return mLikelihoodType;
}
/**
* validate a genotype score
*
* @param score the score to validate
*/
public void validateScore(double score) {
int x = 0;
switch (mLikelihoodType) {
case NEGATIVE_LOG:
if (score < 0)
throw new ReviewedStingException("Likelikhood score of " + score + " is invalid, for NEGATIVE_LOG it must be greater than or equal to 0");
break;
case LOG:
if (score > 0)
throw new ReviewedStingException("Likelikhood score of " + score + " is invalid, for LOG it must be less than or equal to 0");
break;
case RAW:
if (score < 0 || score > 1)
throw new ReviewedStingException("Likelikhood score of " + score + " is invalid, for RAW it must be [0,1]");
break;
}
}
/**
* set our likelihood storage type, and adjust our current likelihood values to reflect
* the new setting.
*
* @param likelihood the type to set the values to.
*/
public void setLikelihoodType(LIKELIHOOD_TYPE likelihood) {
if (likelihood == mLikelihoodType)
return;
if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (likelihood == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
mult = -1.0;
}
// one of us in log, the other negitive log, it doesn't matter which
for (GENOTYPE g : likelihoods.keySet()) {
likelihoods.put(g, -1.0 * Math.log(likelihoods.get(g)));
}
} else if (likelihood == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
mult = -1.0;
}
// one of us in log, the other negitive log, it doesn't matter which
for (GENOTYPE g : likelihoods.keySet()) {
likelihoods.put(g, Math.pow(likelihoods.get(g) * mult, 10));
}
} else {
// one of us in log, the other negitive log, it doesn't matter which
for (GENOTYPE g : likelihoods.keySet()) {
likelihoods.put(g, -1.0 * likelihoods.get(g));
}
}
this.mLikelihoodType = likelihood;
}
}