Retiring multi-sample genotyper

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3402 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-05-20 14:11:11 +00:00
parent 1ab00e5895
commit a8f4ccf2f9
5 changed files with 1965 additions and 0 deletions

View File

@ -0,0 +1,186 @@
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
import org.broadinstitute.sting.utils.*;
import net.sf.samtools.*;
import java.util.List;
import java.util.ArrayList;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Apr 14, 2009
* Time: 8:54:05 AM
* To change this template use File | Settings | File Templates.
*/
abstract public class BasicPileup {
public static final char DELETION_CHAR = 'D';
@Deprecated
abstract GenomeLoc getLocation();
@Deprecated
abstract char getRef();
@Deprecated
abstract int size();
/**
* This is the right way to get bases
*
* @return
*/
@Deprecated
byte[] getBases() { return null; }
/**
* This is the right way to get quals
*
* @return
*/
@Deprecated
byte[] getQuals() { return null; }
/**
* This is a terrible way to get bases. Use getBases() or getBasesAsArrayList()
*
* @return
*/
@Deprecated
String getBasesAsString() { return null; }
/**
* This is a terrible way to get quals. Use getQuals() or getQualsAsArrayList()
*
* @return
*/
@Deprecated
String getQualsAsString() { return null; }
public String getPileupString() {
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
}
//
// ArrayList<Byte> methods
//
public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
byte array[] = new byte[reads.size()];
int index = 0;
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
if ( offset == -1 ) {
array[index++] = ((byte)DELETION_CHAR);
} else {
array[index++] = read.getReadBases()[offset];
}
}
return array;
}
@Deprecated
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for (byte value : getBasesAsArray(reads, offsets))
bases.add(value);
return bases;
}
@Deprecated
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for (byte value : getQualsAsArray(reads, offsets))
quals.add(value);
return quals;
}
@Deprecated
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
byte array[] = new byte[reads.size()];
int index = 0;
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
// skip deletion sites
if ( offset == -1 ) {
array[index++] = ((byte)0);
} else {
array[index++] = read.getBaseQualities()[offset];
}
}
return array;
}
@Deprecated
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
byte qual = (byte)read.getMappingQuality();
quals.add(qual);
}
return quals;
}
@Deprecated // todo -- delete me
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
{
String[] indels = new String[reads.size()];
for (int i = 0; i < reads.size(); i++)
{
SAMRecord read = reads.get(i);
Cigar cigar = read.getCigar();
int offset = offsets.get(i);
String cigar_string = read.getCigarString();
if (! (cigar_string.contains("I") || cigar_string.contains("D"))) { indels[i] = "null"; continue; }
//System.out.printf("%s:%d %s %s %s ", read.getReferenceName(), read.getAlignmentStart(), read.getReadName(), read.getReadString(), cigar_string);
int k = 0;
for (int j = 0; j < cigar.numCigarElements(); j++)
{
CigarOperator operator = cigar.getCigarElement(j).getOperator();
int length = cigar.getCigarElement(j).getLength();
if (operator == CigarOperator.M)
{
k += length;
}
else if ((k == offset+1) && (operator == CigarOperator.I))
{
// this insertion is associated with this offset (kinda ;) ).
indels[i] = read.getReadString().substring(k, k+length);
//System.out.printf("(I,%d,%d)", k, offset);
break;
}
else if ((k != offset+1) && (operator == CigarOperator.I))
{
//System.out.printf("(i,%d,%d)", k, offset);
k += length;
}
else if ((k == offset) && (operator == CigarOperator.D))
{
// this deletion is associated with this offset.
indels[i] = length + "D";
//System.out.printf("(D,%d,%d)", k, offset);
break;
}
else if (k >= offset)
{
// no indel here.
indels[i] = "null";
//System.out.printf("(N,%d,%d)", k, offset);
break;
}
}
if (indels[i] == null) { indels[i] = "null"; }
//System.out.printf("\n");
}
return indels;
}
}

View File

@ -0,0 +1,508 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
import org.broadinstitute.sting.utils.MathUtils;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.lang.Cloneable;
public class ClassicGenotypeLikelihoods implements Cloneable {
// precalculate these for performance (pow/log10 is expensive!)
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
//oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual));
}
}
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0);
//oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0);
}
}
private static double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
public double[] likelihoods;
public String[] genotypes;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
double ref_likelihood = Double.NaN;
private IndelLikelihood indel_likelihood;
// Store the 2nd-best base priors for on-genotype primary bases
//private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
//private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
private static double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
private static double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
public ClassicGenotypeLikelihoods() {
initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
}
public ClassicGenotypeLikelihoods(boolean foo) {
}
public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
public ClassicGenotypeLikelihoods clone() {
ClassicGenotypeLikelihoods c = new ClassicGenotypeLikelihoods(false);
c.likelihoods = this.likelihoods.clone();
c.genotypes = this.genotypes.clone();
c.coverage = this.coverage;
// The genotype priors;
c.priorHomRef = this.priorHomRef;
c.priorHet = this.priorHet;
c.priorHomVar = this.priorHomVar;
//public String[] sorted_genotypes;
//public double[] sorted_likelihoods;
//double ref_likelihood = Double.NaN;
//private IndelLikelihood indel_likelihood;
return c;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
genotypes = new String[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
genotypes[0] = "AA";
genotypes[1] = "AC";
genotypes[2] = "AG";
genotypes[3] = "AT";
genotypes[4] = "CC";
genotypes[5] = "CG";
genotypes[6] = "CT";
genotypes[7] = "GG";
genotypes[8] = "GT";
genotypes[9] = "TT";
//for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
// onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
// offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
//}
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
// public double[] getOnGenotypeSecondaryPriors() {
// double[] p2ndon = new double[10];
//
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
// p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
// }
//
// return p2ndon;
// }
//
// public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
// onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
// }
// }
//
// public double[] getOffGenotypeSecondaryPriors() {
// double[] p2ndoff = new double[10];
//
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
// p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
// }
//
// return p2ndoff;
// }
//
// public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
// offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
// }
// }
public void add(char ref, char read, byte qual)
{
if (qual <= 0) { qual = 1; }
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
double sum = 0.0;
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
likelihoods[i] += likelihood;
}
coverage += 1;
}
public void add(char ref, char read, byte qual, ConfusionMatrix matrix, String platform)
{
if (qual <= 0) { qual = 1; }
if (platform == null) { platform = "ILLUMINA"; }
if (read == 'N') { return; }
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
double sum = 0.0;
for (int i = 0; i < genotypes.length; i++)
{
char h1 = genotypes[i].charAt(0);
char h2 = genotypes[i].charAt(1);
double p1 = matrix.lookup(platform, read, h1);
double p2 = matrix.lookup(platform, read, h2);
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual, p1, p2);
//System.out.printf("DBG: %c %c %s %d %f %f %f\n", ref, read, genotypes[i], qual, p1, p2, likelihood);
likelihoods[i] += likelihood;
}
coverage += 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
qual = 1;
} // zero quals are wrong.
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ((h1 != h2) && ((h1 == read) || (h2 == read))) {
// het
p_base = getOneHalfMinusQual(qual);
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public void TEST()
{
double p_A2A = 1.00;
double p_T2T = 1.00;
double p_A2T = 0.75;
double p_T2A = 0.25;
char ref = 'A';
System.out.printf("\tA\tT\n");
System.out.printf("A\t%.02f\t%.02f\n", p_A2A, p_A2T);
System.out.printf("T\t%.02f\t%.02f\n", p_T2A, p_T2T);
System.out.printf("\n");
System.out.printf("P(A,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'A', "AA", (byte)20, p_A2A, p_A2A));
System.out.printf("P(A,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'A', "AT", (byte)20, p_A2A, p_A2T));
System.out.printf("P(A,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'A', "TT", (byte)20, p_A2T, p_A2T));
System.out.printf("P(T,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'T', "AA", (byte)20, p_T2A, p_T2A));
System.out.printf("P(T,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'T', "AT", (byte)20, p_T2A, p_T2T));
System.out.printf("P(T,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'T', "TT", (byte)20, p_T2T, p_T2T));
//System.exit(0);
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual, double p1, double p2) {
if (qual == 0) {
qual = 1;
} // zero quals are wrong.
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double perr = Math.pow(10.0,qual/-10.0);
double p_base = 0;
if (read == h1)
{
p_base += (1.0 - perr);
}
else
{
p_base += perr * p1;
}
if (read == h2)
{
p_base += (1.0 - perr);
}
else
{
p_base += perr * p2;
}
p_base = Math.log10(p_base/2.0);
return p_base;
}
public void sort() {
Integer[] permutation = MathUtils.sortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = MathUtils.permuteArray(genotypes, reverse_permutation);
sorted_likelihoods = MathUtils.permuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void ApplyPrior(char ref, double[] allele_likelihoods)
{
int k = 0;
for (int i = 0; i < 4; i++)
{
for (int j = i; j < 4; j++)
{
if (i == j)
{
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
}
else
{
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void ApplyPrior(char ref, char alt, double p_alt) {
for (int i = 0; i < genotypes.length; i++) {
if ((p_alt == -1) || (p_alt <= 1e-6)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
} else {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
// het
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
} else {
// something else (noise!)
likelihoods[i] += Math.log10(1e-5);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
}
this.sort();
}
public void ApplyWeight(double weight)
{
double log10weight = Math.log10(weight);
for (int i = 0; i < genotypes.length; i++) { likelihoods[i] += log10weight; }
this.sort();
}
// public void applySecondBaseDistributionPrior(String primaryBases, String secondaryBases) {
// for (int genotypeIndex = 0; genotypeIndex < genotypes.length; genotypeIndex++) {
// char firstAllele = genotypes[genotypeIndex].charAt(0);
// char secondAllele = genotypes[genotypeIndex].charAt(1);
//
// int offIsGenotypic = 0;
// int offTotal = 0;
//
// int onIsGenotypic = 0;
// int onTotal = 0;
//
// for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
// char primaryBase = primaryBases.charAt(pileupIndex);
//
// if (secondaryBases != null) {
// char secondaryBase = secondaryBases.charAt(pileupIndex);
//
// if (primaryBase != firstAllele && primaryBase != secondAllele) {
// if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
// offIsGenotypic++;
// }
// offTotal++;
// } else {
// if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
// onIsGenotypic++;
// }
// onTotal++;
// }
// }
// }
//
// double offPrior = MathUtils.binomialProbability(offIsGenotypic, offTotal, offNextBestBasePriors.get(genotypes[genotypeIndex]));
// double onPrior = MathUtils.binomialProbability(onIsGenotypic, onTotal, onNextBestBasePriors.get(genotypes[genotypeIndex]));
//
// likelihoods[genotypeIndex] += Math.log10(offPrior) + Math.log10(onPrior);
// }
// this.sort();
// }
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref)
{
this.LodVsRef(ref);
return this.ref_likelihood;
}
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
import java.lang.*;
import java.util.*;
import java.io.*;
import org.broadinstitute.sting.utils.*;
public class ConfusionMatrix
{
private double[][] ILLUMINA;
private double[][] solid;
private double[][] LS454;
public ConfusionMatrix(String file_name)
{
//System.out.println("DBG: ConfusionMatrix constructor! (" + file_name + ")");
ILLUMINA = new double[4][4];
solid = new double[4][4];
LS454 = new double[4][4];
try
{
Scanner sc = new Scanner(new File(file_name));
while (sc.hasNext())
{
String platform = sc.next();
char read = sc.next().charAt(0);
char ref = sc.next().charAt(0);
double p = sc.nextDouble();
int read_i = BaseUtils.simpleBaseToBaseIndex(read);
int ref_i = BaseUtils.simpleBaseToBaseIndex(ref);
if (platform.equals("ILLUMINA")) { ILLUMINA[read_i][ref_i] = p; }
if (platform.equals("solid")) { solid[read_i][ref_i] = p; }
if (platform.equals("LS454")) { LS454[read_i][ref_i] = p; }
//System.out.println("DBG: " + key + " " + p);
}
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
}
double lookup(String platform, char read, char truth)
{
int read_i = BaseUtils.simpleBaseToBaseIndex(read);
int truth_i = BaseUtils.simpleBaseToBaseIndex(truth);
double d = 0;
if (platform.equals("ILLUMINA")) { d = ILLUMINA[read_i][truth_i]; }
else if (platform.equals("solid")) { d = solid[read_i][truth_i]; }
else if (platform.equals("LS454")) { d = LS454[read_i][truth_i]; }
else { assert(false); }
return d;
}
}

View File

@ -0,0 +1,113 @@
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
import java.util.HashMap;
public class IndelLikelihood {
private String type;
private String[] alleles;
private double p;
private double lod;
private double pRef;
private double pHet;
private double pHom;
private String alt;
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
initialize(type, alleles, p, lod);
}
public IndelLikelihood(String[] indels, double indel_alt_freq)
{
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++) {
if (! indel_allele_counts.containsKey(indels[i])) {
indel_allele_counts.put(indels[i], 1);
} else {
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
//double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) {
for (int i = 0; i < keys.length; i++) {
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + 2*Math.log10(1-indel_alt_freq);
pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10((1-indel_alt_freq)*indel_alt_freq);
pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + 2*Math.log10(indel_alt_freq);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0) {
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
//return new IndelLikelihood("ref", genotype, pRef, lodRef);
initialize("ref", genotype, pRef, lodRef);
} else if (lodHet > lodHom) {
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
//return new IndelLikelihood("het", genotype, pHet, lodHet);
initialize("het", genotype, pHet, lodHet);
} else {
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
//return new IndelLikelihood("hom", genotype, pHom, lodHom);
initialize("hom", genotype, pHom, lodHom);
}
}
}
private void initialize(String type, String[] alleles, double p, double lod) {
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String getAlt() { return alt; }
public double pRef() { return pRef; }
public double pHet() { return pHet; }
public double pHom() { return pHom; }
public String getType() { return type; }
public String[] getAlleles() { return alleles; }
public double getPosteriorProbability() { return p; }
public double getLOD() { return lod; }
public String toString() {
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}