Retiring multi-sample genotyper
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3402 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1ab00e5895
commit
a8f4ccf2f9
|
|
@ -0,0 +1,186 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Apr 14, 2009
|
||||
* Time: 8:54:05 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
abstract public class BasicPileup {
|
||||
public static final char DELETION_CHAR = 'D';
|
||||
|
||||
@Deprecated
|
||||
abstract GenomeLoc getLocation();
|
||||
@Deprecated
|
||||
abstract char getRef();
|
||||
@Deprecated
|
||||
abstract int size();
|
||||
|
||||
|
||||
/**
|
||||
* This is the right way to get bases
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
byte[] getBases() { return null; }
|
||||
|
||||
/**
|
||||
* This is the right way to get quals
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
byte[] getQuals() { return null; }
|
||||
|
||||
/**
|
||||
* This is a terrible way to get bases. Use getBases() or getBasesAsArrayList()
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
String getBasesAsString() { return null; }
|
||||
|
||||
/**
|
||||
* This is a terrible way to get quals. Use getQuals() or getQualsAsArrayList()
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
String getQualsAsString() { return null; }
|
||||
|
||||
public String getPileupString() {
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
//
|
||||
// ArrayList<Byte> methods
|
||||
//
|
||||
public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
byte array[] = new byte[reads.size()];
|
||||
int index = 0;
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
if ( offset == -1 ) {
|
||||
array[index++] = ((byte)DELETION_CHAR);
|
||||
} else {
|
||||
array[index++] = read.getReadBases()[offset];
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getBasesAsArray(reads, offsets))
|
||||
bases.add(value);
|
||||
return bases;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getQualsAsArray(reads, offsets))
|
||||
quals.add(value);
|
||||
return quals;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
byte array[] = new byte[reads.size()];
|
||||
int index = 0;
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
||||
// skip deletion sites
|
||||
if ( offset == -1 ) {
|
||||
array[index++] = ((byte)0);
|
||||
} else {
|
||||
array[index++] = read.getBaseQualities()[offset];
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
byte qual = (byte)read.getMappingQuality();
|
||||
quals.add(qual);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
||||
@Deprecated // todo -- delete me
|
||||
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
|
||||
{
|
||||
String[] indels = new String[reads.size()];
|
||||
|
||||
for (int i = 0; i < reads.size(); i++)
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
Cigar cigar = read.getCigar();
|
||||
int offset = offsets.get(i);
|
||||
|
||||
String cigar_string = read.getCigarString();
|
||||
if (! (cigar_string.contains("I") || cigar_string.contains("D"))) { indels[i] = "null"; continue; }
|
||||
|
||||
//System.out.printf("%s:%d %s %s %s ", read.getReferenceName(), read.getAlignmentStart(), read.getReadName(), read.getReadString(), cigar_string);
|
||||
int k = 0;
|
||||
for (int j = 0; j < cigar.numCigarElements(); j++)
|
||||
{
|
||||
CigarOperator operator = cigar.getCigarElement(j).getOperator();
|
||||
int length = cigar.getCigarElement(j).getLength();
|
||||
if (operator == CigarOperator.M)
|
||||
{
|
||||
k += length;
|
||||
}
|
||||
else if ((k == offset+1) && (operator == CigarOperator.I))
|
||||
{
|
||||
// this insertion is associated with this offset (kinda ;) ).
|
||||
indels[i] = read.getReadString().substring(k, k+length);
|
||||
//System.out.printf("(I,%d,%d)", k, offset);
|
||||
break;
|
||||
}
|
||||
else if ((k != offset+1) && (operator == CigarOperator.I))
|
||||
{
|
||||
//System.out.printf("(i,%d,%d)", k, offset);
|
||||
k += length;
|
||||
}
|
||||
else if ((k == offset) && (operator == CigarOperator.D))
|
||||
{
|
||||
// this deletion is associated with this offset.
|
||||
indels[i] = length + "D";
|
||||
//System.out.printf("(D,%d,%d)", k, offset);
|
||||
break;
|
||||
}
|
||||
else if (k >= offset)
|
||||
{
|
||||
// no indel here.
|
||||
indels[i] = "null";
|
||||
//System.out.printf("(N,%d,%d)", k, offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (indels[i] == null) { indels[i] = "null"; }
|
||||
//System.out.printf("\n");
|
||||
}
|
||||
|
||||
return indels;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,508 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
import java.lang.Cloneable;
|
||||
|
||||
public class ClassicGenotypeLikelihoods implements Cloneable {
|
||||
// precalculate these for performance (pow/log10 is expensive!)
|
||||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
|
||||
//oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual));
|
||||
}
|
||||
}
|
||||
|
||||
private static double getOneMinusQual(final byte qual) {
|
||||
return oneMinusData[qual];
|
||||
}
|
||||
|
||||
private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0);
|
||||
//oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0);
|
||||
}
|
||||
}
|
||||
|
||||
private static double getOneHalfMinusQual(final byte qual) {
|
||||
return oneHalfMinusData[qual];
|
||||
}
|
||||
|
||||
public double[] likelihoods;
|
||||
public String[] genotypes;
|
||||
public int coverage;
|
||||
|
||||
// The genotype priors;
|
||||
private double priorHomRef;
|
||||
private double priorHet;
|
||||
private double priorHomVar;
|
||||
public String[] sorted_genotypes;
|
||||
public double[] sorted_likelihoods;
|
||||
double ref_likelihood = Double.NaN;
|
||||
private IndelLikelihood indel_likelihood;
|
||||
|
||||
// Store the 2nd-best base priors for on-genotype primary bases
|
||||
//private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
// Store the 2nd-best base priors for off-genotype primary bases
|
||||
//private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
private static double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
private static double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
|
||||
public ClassicGenotypeLikelihoods() {
|
||||
initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public ClassicGenotypeLikelihoods(boolean foo) {
|
||||
}
|
||||
|
||||
public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
||||
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public ClassicGenotypeLikelihoods clone() {
|
||||
ClassicGenotypeLikelihoods c = new ClassicGenotypeLikelihoods(false);
|
||||
c.likelihoods = this.likelihoods.clone();
|
||||
c.genotypes = this.genotypes.clone();
|
||||
c.coverage = this.coverage;
|
||||
|
||||
// The genotype priors;
|
||||
c.priorHomRef = this.priorHomRef;
|
||||
c.priorHet = this.priorHet;
|
||||
c.priorHomVar = this.priorHomVar;
|
||||
//public String[] sorted_genotypes;
|
||||
//public double[] sorted_likelihoods;
|
||||
//double ref_likelihood = Double.NaN;
|
||||
//private IndelLikelihood indel_likelihood;
|
||||
return c;
|
||||
}
|
||||
|
||||
private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
this.priorHomRef = priorHomRef;
|
||||
this.priorHet = priorHet;
|
||||
this.priorHomVar = priorHomVar;
|
||||
|
||||
likelihoods = new double[10];
|
||||
genotypes = new String[10];
|
||||
coverage = 0;
|
||||
|
||||
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
||||
|
||||
genotypes[0] = "AA";
|
||||
genotypes[1] = "AC";
|
||||
genotypes[2] = "AG";
|
||||
genotypes[3] = "AT";
|
||||
genotypes[4] = "CC";
|
||||
genotypes[5] = "CG";
|
||||
genotypes[6] = "CT";
|
||||
genotypes[7] = "GG";
|
||||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
|
||||
//for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
// onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
// offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
//}
|
||||
}
|
||||
|
||||
public double getHomRefPrior() {
|
||||
return priorHomRef;
|
||||
}
|
||||
|
||||
public void setHomRefPrior(double priorHomRef) {
|
||||
this.priorHomRef = priorHomRef;
|
||||
}
|
||||
|
||||
public double getHetPrior() {
|
||||
return priorHet;
|
||||
}
|
||||
|
||||
public void setHetPrior(double priorHet) {
|
||||
this.priorHet = priorHet;
|
||||
}
|
||||
|
||||
public double getHomVarPrior() {
|
||||
return priorHomVar;
|
||||
}
|
||||
|
||||
public void setHomVarPrior(double priorHomVar) {
|
||||
this.priorHomVar = priorHomVar;
|
||||
}
|
||||
|
||||
// public double[] getOnGenotypeSecondaryPriors() {
|
||||
// double[] p2ndon = new double[10];
|
||||
//
|
||||
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
// p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
// }
|
||||
//
|
||||
// return p2ndon;
|
||||
// }
|
||||
//
|
||||
// public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
|
||||
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
// onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public double[] getOffGenotypeSecondaryPriors() {
|
||||
// double[] p2ndoff = new double[10];
|
||||
//
|
||||
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
// p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
// }
|
||||
//
|
||||
// return p2ndoff;
|
||||
// }
|
||||
//
|
||||
// public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
|
||||
// for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
// offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
// }
|
||||
// }
|
||||
|
||||
public void add(char ref, char read, byte qual)
|
||||
{
|
||||
if (qual <= 0) { qual = 1; }
|
||||
|
||||
if (coverage == 0)
|
||||
{
|
||||
for (int i = 0; i < likelihoods.length; i++)
|
||||
{
|
||||
likelihoods[i] = 0;
|
||||
}
|
||||
}
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||
likelihoods[i] += likelihood;
|
||||
}
|
||||
coverage += 1;
|
||||
}
|
||||
|
||||
public void add(char ref, char read, byte qual, ConfusionMatrix matrix, String platform)
|
||||
{
|
||||
if (qual <= 0) { qual = 1; }
|
||||
if (platform == null) { platform = "ILLUMINA"; }
|
||||
if (read == 'N') { return; }
|
||||
|
||||
if (coverage == 0)
|
||||
{
|
||||
for (int i = 0; i < likelihoods.length; i++)
|
||||
{
|
||||
likelihoods[i] = 0;
|
||||
}
|
||||
}
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
char h1 = genotypes[i].charAt(0);
|
||||
char h2 = genotypes[i].charAt(1);
|
||||
|
||||
double p1 = matrix.lookup(platform, read, h1);
|
||||
double p2 = matrix.lookup(platform, read, h2);
|
||||
|
||||
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual, p1, p2);
|
||||
|
||||
//System.out.printf("DBG: %c %c %s %d %f %f %f\n", ref, read, genotypes[i], qual, p1, p2, likelihood);
|
||||
|
||||
likelihoods[i] += likelihood;
|
||||
}
|
||||
coverage += 1;
|
||||
}
|
||||
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
||||
if (qual == 0) {
|
||||
qual = 1;
|
||||
} // zero quals are wrong.
|
||||
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read)) {
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
} else if ((h1 != h2) && ((h1 == read) || (h2 == read))) {
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
} else {
|
||||
// error
|
||||
p_base = qual / -10.0;
|
||||
}
|
||||
|
||||
return p_base;
|
||||
}
|
||||
|
||||
public void TEST()
|
||||
{
|
||||
double p_A2A = 1.00;
|
||||
double p_T2T = 1.00;
|
||||
|
||||
double p_A2T = 0.75;
|
||||
double p_T2A = 0.25;
|
||||
|
||||
char ref = 'A';
|
||||
|
||||
System.out.printf("\tA\tT\n");
|
||||
System.out.printf("A\t%.02f\t%.02f\n", p_A2A, p_A2T);
|
||||
System.out.printf("T\t%.02f\t%.02f\n", p_T2A, p_T2T);
|
||||
System.out.printf("\n");
|
||||
|
||||
System.out.printf("P(A,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'A', "AA", (byte)20, p_A2A, p_A2A));
|
||||
System.out.printf("P(A,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'A', "AT", (byte)20, p_A2A, p_A2T));
|
||||
System.out.printf("P(A,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'A', "TT", (byte)20, p_A2T, p_A2T));
|
||||
|
||||
System.out.printf("P(T,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'T', "AA", (byte)20, p_T2A, p_T2A));
|
||||
System.out.printf("P(T,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'T', "AT", (byte)20, p_T2A, p_T2T));
|
||||
System.out.printf("P(T,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'T', "TT", (byte)20, p_T2T, p_T2T));
|
||||
|
||||
//System.exit(0);
|
||||
}
|
||||
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual, double p1, double p2) {
|
||||
if (qual == 0) {
|
||||
qual = 1;
|
||||
} // zero quals are wrong.
|
||||
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
double perr = Math.pow(10.0,qual/-10.0);
|
||||
|
||||
double p_base = 0;
|
||||
|
||||
if (read == h1)
|
||||
{
|
||||
p_base += (1.0 - perr);
|
||||
}
|
||||
else
|
||||
{
|
||||
p_base += perr * p1;
|
||||
}
|
||||
|
||||
if (read == h2)
|
||||
{
|
||||
p_base += (1.0 - perr);
|
||||
}
|
||||
else
|
||||
{
|
||||
p_base += perr * p2;
|
||||
}
|
||||
|
||||
p_base = Math.log10(p_base/2.0);
|
||||
|
||||
return p_base;
|
||||
}
|
||||
|
||||
public void sort() {
|
||||
Integer[] permutation = MathUtils.sortPermutation(likelihoods);
|
||||
|
||||
Integer[] reverse_permutation = new Integer[permutation.length];
|
||||
for (int i = 0; i < reverse_permutation.length; i++) {
|
||||
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
|
||||
}
|
||||
|
||||
sorted_genotypes = MathUtils.permuteArray(genotypes, reverse_permutation);
|
||||
sorted_likelihoods = MathUtils.permuteArray(likelihoods, reverse_permutation);
|
||||
}
|
||||
|
||||
public String toString(char ref) {
|
||||
this.sort();
|
||||
double sum = 0;
|
||||
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
|
||||
for (int i = 0; i < sorted_genotypes.length; i++) {
|
||||
if (i != 0) {
|
||||
s = s + " ";
|
||||
}
|
||||
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
|
||||
sum += Math.pow(10,sorted_likelihoods[i]);
|
||||
}
|
||||
s = s + String.format(" %f", sum);
|
||||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, double[] allele_likelihoods)
|
||||
{
|
||||
int k = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
for (int j = i; j < 4; j++)
|
||||
{
|
||||
if (i == j)
|
||||
{
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
||||
}
|
||||
k++;
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, char alt, double p_alt) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6)) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(priorHomRef);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(priorHomVar);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10(priorHet);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
} else {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
|
||||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
|
||||
} else {
|
||||
// something else (noise!)
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
}
|
||||
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
|
||||
public void ApplyWeight(double weight)
|
||||
{
|
||||
double log10weight = Math.log10(weight);
|
||||
for (int i = 0; i < genotypes.length; i++) { likelihoods[i] += log10weight; }
|
||||
this.sort();
|
||||
}
|
||||
|
||||
// public void applySecondBaseDistributionPrior(String primaryBases, String secondaryBases) {
|
||||
// for (int genotypeIndex = 0; genotypeIndex < genotypes.length; genotypeIndex++) {
|
||||
// char firstAllele = genotypes[genotypeIndex].charAt(0);
|
||||
// char secondAllele = genotypes[genotypeIndex].charAt(1);
|
||||
//
|
||||
// int offIsGenotypic = 0;
|
||||
// int offTotal = 0;
|
||||
//
|
||||
// int onIsGenotypic = 0;
|
||||
// int onTotal = 0;
|
||||
//
|
||||
// for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
|
||||
// char primaryBase = primaryBases.charAt(pileupIndex);
|
||||
//
|
||||
// if (secondaryBases != null) {
|
||||
// char secondaryBase = secondaryBases.charAt(pileupIndex);
|
||||
//
|
||||
// if (primaryBase != firstAllele && primaryBase != secondAllele) {
|
||||
// if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
// offIsGenotypic++;
|
||||
// }
|
||||
// offTotal++;
|
||||
// } else {
|
||||
// if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
// onIsGenotypic++;
|
||||
// }
|
||||
// onTotal++;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// double offPrior = MathUtils.binomialProbability(offIsGenotypic, offTotal, offNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
// double onPrior = MathUtils.binomialProbability(onIsGenotypic, onTotal, onNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
//
|
||||
// likelihoods[genotypeIndex] += Math.log10(offPrior) + Math.log10(onPrior);
|
||||
// }
|
||||
// this.sort();
|
||||
// }
|
||||
|
||||
public double LodVsNextBest() {
|
||||
this.sort();
|
||||
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
||||
}
|
||||
|
||||
public double LodVsRef(char ref) {
|
||||
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
|
||||
ref_likelihood = sorted_likelihoods[0];
|
||||
return (-1.0 * this.LodVsNextBest());
|
||||
} else {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
ref_likelihood = likelihoods[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return sorted_likelihoods[0] - ref_likelihood;
|
||||
}
|
||||
|
||||
public String BestGenotype() {
|
||||
this.sort();
|
||||
return this.sorted_genotypes[0];
|
||||
}
|
||||
|
||||
public double BestPosterior() {
|
||||
this.sort();
|
||||
return this.sorted_likelihoods[0];
|
||||
}
|
||||
|
||||
public double RefPosterior(char ref)
|
||||
{
|
||||
this.LodVsRef(ref);
|
||||
return this.ref_likelihood;
|
||||
}
|
||||
|
||||
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
|
||||
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
|
||||
|
||||
import java.lang.*;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
public class ConfusionMatrix
|
||||
{
|
||||
private double[][] ILLUMINA;
|
||||
private double[][] solid;
|
||||
private double[][] LS454;
|
||||
|
||||
public ConfusionMatrix(String file_name)
|
||||
{
|
||||
//System.out.println("DBG: ConfusionMatrix constructor! (" + file_name + ")");
|
||||
|
||||
ILLUMINA = new double[4][4];
|
||||
solid = new double[4][4];
|
||||
LS454 = new double[4][4];
|
||||
|
||||
try
|
||||
{
|
||||
Scanner sc = new Scanner(new File(file_name));
|
||||
while (sc.hasNext())
|
||||
{
|
||||
String platform = sc.next();
|
||||
char read = sc.next().charAt(0);
|
||||
char ref = sc.next().charAt(0);
|
||||
double p = sc.nextDouble();
|
||||
|
||||
int read_i = BaseUtils.simpleBaseToBaseIndex(read);
|
||||
int ref_i = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
|
||||
if (platform.equals("ILLUMINA")) { ILLUMINA[read_i][ref_i] = p; }
|
||||
if (platform.equals("solid")) { solid[read_i][ref_i] = p; }
|
||||
if (platform.equals("LS454")) { LS454[read_i][ref_i] = p; }
|
||||
|
||||
//System.out.println("DBG: " + key + " " + p);
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
double lookup(String platform, char read, char truth)
|
||||
{
|
||||
int read_i = BaseUtils.simpleBaseToBaseIndex(read);
|
||||
int truth_i = BaseUtils.simpleBaseToBaseIndex(truth);
|
||||
|
||||
double d = 0;
|
||||
|
||||
if (platform.equals("ILLUMINA")) { d = ILLUMINA[read_i][truth_i]; }
|
||||
else if (platform.equals("solid")) { d = solid[read_i][truth_i]; }
|
||||
else if (platform.equals("LS454")) { d = LS454[read_i][truth_i]; }
|
||||
else { assert(false); }
|
||||
|
||||
return d;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.multisamplecaller;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
public class IndelLikelihood {
|
||||
private String type;
|
||||
private String[] alleles;
|
||||
private double p;
|
||||
private double lod;
|
||||
|
||||
private double pRef;
|
||||
private double pHet;
|
||||
private double pHom;
|
||||
private String alt;
|
||||
|
||||
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
|
||||
initialize(type, alleles, p, lod);
|
||||
}
|
||||
|
||||
public IndelLikelihood(String[] indels, double indel_alt_freq)
|
||||
{
|
||||
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
|
||||
|
||||
for (int i = 0; i < indels.length; i++) {
|
||||
if (! indel_allele_counts.containsKey(indels[i])) {
|
||||
indel_allele_counts.put(indels[i], 1);
|
||||
} else {
|
||||
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
|
||||
}
|
||||
}
|
||||
|
||||
Object[] keys = indel_allele_counts.keySet().toArray();
|
||||
String[] alleles = new String[keys.length];
|
||||
int[] counts = new int[keys.length];
|
||||
//double likelihoods[] = new double[keys.length];
|
||||
int null_count = 0;
|
||||
String max_allele = null;
|
||||
int max_count = -1;
|
||||
|
||||
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) {
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
Integer count = (Integer)indel_allele_counts.get(keys[i]);
|
||||
alleles[i] = (String)keys[i];
|
||||
counts[i] = count;
|
||||
if (alleles[i].equals("null")) { null_count = counts[i]; }
|
||||
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
|
||||
//System.out.printf("%s[%d] ", keys[i], count);
|
||||
}
|
||||
//System.out.printf("\n");
|
||||
|
||||
double eps = 1e-3;
|
||||
pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + 2*Math.log10(1-indel_alt_freq);
|
||||
pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10((1-indel_alt_freq)*indel_alt_freq);
|
||||
pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + 2*Math.log10(indel_alt_freq);
|
||||
|
||||
double lodRef = pRef - Math.max(pHet, pHom);
|
||||
double lodHet = pHet - pRef;
|
||||
double lodHom = pHom - pRef;
|
||||
|
||||
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
|
||||
//System.out.printf("\n");
|
||||
|
||||
if (lodRef > 0) {
|
||||
// reference call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = "null";
|
||||
|
||||
//return new IndelLikelihood("ref", genotype, pRef, lodRef);
|
||||
initialize("ref", genotype, pRef, lodRef);
|
||||
} else if (lodHet > lodHom) {
|
||||
// het call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = max_allele;
|
||||
|
||||
//return new IndelLikelihood("het", genotype, pHet, lodHet);
|
||||
initialize("het", genotype, pHet, lodHet);
|
||||
} else {
|
||||
// hom call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = max_allele;
|
||||
genotype[1] = max_allele;
|
||||
|
||||
//return new IndelLikelihood("hom", genotype, pHom, lodHom);
|
||||
initialize("hom", genotype, pHom, lodHom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void initialize(String type, String[] alleles, double p, double lod) {
|
||||
this.type = type;
|
||||
this.alleles = alleles;
|
||||
this.p = p;
|
||||
this.lod = lod;
|
||||
}
|
||||
|
||||
public String getAlt() { return alt; }
|
||||
public double pRef() { return pRef; }
|
||||
public double pHet() { return pHet; }
|
||||
public double pHom() { return pHom; }
|
||||
|
||||
public String getType() { return type; }
|
||||
public String[] getAlleles() { return alleles; }
|
||||
public double getPosteriorProbability() { return p; }
|
||||
public double getLOD() { return lod; }
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue