Now that G's new genotyping algorithm is live, I've cleaned up the code to completely separate the grid search from the exact model. AlleleFrequencyCalculationModel is now completely abstract.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4517 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-18 18:04:06 +00:00
parent 80e5ac65b4
commit d54d9880d7
3 changed files with 252 additions and 325 deletions

View File

@ -29,15 +29,9 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.vcf.VCFConstants;
import java.io.PrintStream;
import java.util.*;
@ -54,154 +48,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
GRID_SEARCH
}
private static final double LOG10_REFERENCE_CALL_EPSILON = 1.0;
protected int N;
protected AlleleFrequencyMatrix AFMatrix;
protected Set<BiallelicGenotypeLikelihoods> refCalls;
protected Logger logger;
protected PrintStream verboseWriter;
protected boolean useReferenceSampleOptimization;
protected enum GenotypeType { AA, AB, BB }
protected AlleleFrequencyCalculationModel(int N, Logger logger, PrintStream verboseWriter, boolean useReferenceSampleOptimization) {
this.N = N;
this.logger = logger;
this.verboseWriter = verboseWriter;
this.useReferenceSampleOptimization = useReferenceSampleOptimization;
AFMatrix = new AlleleFrequencyMatrix(N);
refCalls = new HashSet<BiallelicGenotypeLikelihoods>();
}
/**
* Must be overridden by concrete subclasses
* @param tracker rod data
* @param ref reference context
* @param GLs genotype likelihoods
* @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
* @param minFrequencyToCalculate the minimum frequency which needs to be calculated
*/
public abstract void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors,
int minFrequencyToCalculate);
/**
* Can be overridden by concrete subclasses
* @param contexts alignment contexts
* @param GLs genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
public Map<String, Genotype> assignGenotypes(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
initializeAFMatrix(GLs);
// increment the grid
for (int i = 1; i <= AFofMaxLikelihood; i++) {
// add one more alternate allele
AFMatrix.incrementFrequency();
}
return generateCalls(contexts, GLs, AFofMaxLikelihood);
}
protected Map<String, Genotype> generateCalls(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
int frequency) {
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
// first, the potential alt calls
for ( String sample : AFMatrix.getSamples() ) {
BiallelicGenotypeLikelihoods GL = GLs.get(sample);
Allele alleleA = GL.getAlleleA();
Allele alleleB = GL.getAlleleB();
// set the genotype and confidence
Pair<Integer, Double> AFbasedGenotype = AFMatrix.getGenotype(frequency, sample);
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) {
myAlleles.add(alleleA);
myAlleles.add(alleleA);
} else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) {
myAlleles.add(alleleA);
myAlleles.add(alleleB);
} else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() )
myAlleles.add(alleleB);
myAlleles.add(alleleB);
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false));
}
// now, the clearly ref calls
for ( BiallelicGenotypeLikelihoods GL : refCalls ) {
String sample = GL.getSample();
Allele ref = GL.getAlleleA();
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
myAlleles.add(ref);
myAlleles.add(ref);
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
double GQ = GL.getAALikelihoods() - Math.max(GL.getABLikelihoods(), GL.getBBLikelihoods());
calls.put(sample, new Genotype(sample, myAlleles, GQ, null, attributes, false));
}
return calls;
}
protected void initializeAFMatrix(Map<String, BiallelicGenotypeLikelihoods> GLs) {
refCalls.clear();
AFMatrix.clear();
for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) {
if ( useReferenceSampleOptimization && isClearRefCall(GL) ) {
refCalls.add(GL);
} else {
AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample());
}
}
}
private boolean isClearRefCall(BiallelicGenotypeLikelihoods GL) {
if ( GL.getAlleleA().isNonReference() )
return false;
double[] likelihoods = GL.getLikelihoods();
double refLikelihoodMinusEpsilon = likelihoods[0] - LOG10_REFERENCE_CALL_EPSILON;
return ( refLikelihoodMinusEpsilon > likelihoods[1] && refLikelihoodMinusEpsilon > likelihoods[2]);
}
private int getUnfilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) )
count++;
}
return count;
}
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
protected class CalculatedAlleleFrequency {
@ -214,157 +68,49 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
}
}
protected enum GenotypeType { AA, AB, BB }
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
protected AlleleFrequencyCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
this.N = N;
this.logger = logger;
this.verboseWriter = verboseWriter;
}
protected static class AlleleFrequencyMatrix {
/**
* Must be overridden by concrete subclasses
* @param tracker rod data
* @param ref reference context
* @param GLs genotype likelihoods
* @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
* @param minFrequencyToCalculate the minimum frequency which needs to be calculated
*/
protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors,
int minFrequencyToCalculate);
private double[][] matrix; // allele frequency matrix
private int[] indexes; // matrix to maintain which genotype is active
private int maxN; // total possible frequencies in data
private int frequency; // current frequency
/**
* Can be overridden by concrete subclasses
* @param contexts alignment contexts
* @param GLs genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
protected abstract Map<String, Genotype> assignGenotypes(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood);
// data structures necessary to maintain a list of the best genotypes and their scores
private ArrayList<String> samples = new ArrayList<String>();
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
public AlleleFrequencyMatrix(int N) {
maxN = N;
matrix = new double[N][3];
indexes = new int[N];
clear();
protected int getUnfilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) )
count++;
}
public List<String> getSamples() { return samples; }
public void clear() {
frequency = 0;
for (int i = 0; i < maxN; i++)
indexes[i] = 0;
samples.clear();
samplesToGenotypesPerAF.clear();
}
public void setLikelihoods(double AA, double AB, double BB, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.AA.ordinal()] = AA;
matrix[index][GenotypeType.AB.ordinal()] = AB;
matrix[index][GenotypeType.BB.ordinal()] = BB;
}
public void setLikelihoods(double[] GLs, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.AA.ordinal()] = GLs[0];
matrix[index][GenotypeType.AB.ordinal()] = GLs[1];
matrix[index][GenotypeType.BB.ordinal()] = GLs[2];
}
public void incrementFrequency() {
int N = samples.size();
if ( frequency == 2 * N )
throw new ReviewedStingException("Frequency was incremented past N; how is this possible?");
frequency++;
double greedy = VALUE_NOT_CALCULATED;
int greedyIndex = -1;
for (int i = 0; i < N; i++) {
if ( indexes[i] == GenotypeType.AB.ordinal() ) {
if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()];
greedyIndex = i;
}
}
else if ( indexes[i] == GenotypeType.AA.ordinal() ) {
if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()];
greedyIndex = i;
}
// note that we currently don't bother with breaking ties between samples
// (which would be done by looking at the HOM_VAR value) because it's highly
// unlikely that a collision will both occur and that the difference will
// be significant at HOM_VAR...
}
// if this person is already hom var, he can't add another alternate allele
// so we can ignore that case
}
if ( greedyIndex == -1 )
throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?");
if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() )
indexes[greedyIndex] = GenotypeType.BB.ordinal();
else
indexes[greedyIndex] = GenotypeType.AB.ordinal();
}
public double getLikelihoodsOfFrequency() {
double likelihoods = 0.0;
int N = samples.size();
for (int i = 0; i < N; i++)
likelihoods += matrix[i][indexes[i]];
/*
System.out.println(frequency);
for (int i = 0; i < N; i++) {
for (int j=0; j < 3; j++) {
System.out.print(String.valueOf(matrix[i][j]));
System.out.print(indexes[i] == j ? "* " : " ");
}
System.out.println();
}
System.out.println(likelihoods);
System.out.println();
*/
recordGenotypes();
return likelihoods;
}
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
// it's possible that the genotype conformation hash wasn't initialized
if ( samplesToGenotypesPerAF.get(frequency) == null ) {
// confirm that the current frequency is the target one
if ( frequency != this.frequency )
throw new IllegalStateException("Attempting to retrieve genotypes from an uninitialized hash for frequency " + frequency + " (the hash is current standing at frequency " + this.frequency + ")");
recordGenotypes();
}
return samplesToGenotypesPerAF.get(frequency).get(sample);
}
private void recordGenotypes() {
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
int index = 0;
for ( String sample : samples ) {
int genotype = indexes[index];
double score;
int maxEntry = MathUtils.maxElementIndex(matrix[index]);
// if the max value is for the most likely genotype, we can compute next vs. next best
if ( genotype == maxEntry ) {
if ( genotype == GenotypeType.AA.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else if ( genotype == GenotypeType.AB.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else // ( genotype == GenotypeType.HOM.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]);
}
// otherwise, we need to calculate the probability of the genotype
else {
double[] normalized = MathUtils.normalizeFromLog10(matrix[index]);
double chosenGenotype = normalized[genotype];
score = -1.0 * Math.log10(1.0 - chosenGenotype);
}
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
index++;
}
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
}
return count;
}
}

View File

@ -46,7 +46,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static final double LOGEPS = -300;
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
super(N, logger, verboseWriter, true);
super(N, logger, verboseWriter);
}
public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -151,25 +151,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
// overriding implementation in AlleleFrequencyCalculationModel to avoid filling out AF matrix which is not used.
return generateCalls(contexts, GLs, AFofMaxLikelihood);
}
protected Map<String, Genotype> generateCalls(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
int bestAFguess) {
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
double[][] pathMetricArray = new double[GLs.size()+1][bestAFguess+1];
int[][] tracebackArray = new int[GLs.size()+1][bestAFguess+1];
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
ArrayList<String> sampleIndices = new ArrayList<String>();
int sampleIdx = 0;
// todo - optimize initialization
for (int k=0; k <= bestAFguess; k++)
for (int k=0; k <= AFofMaxLikelihood; k++)
for (int j=0; j <= GLs.size(); j++)
pathMetricArray[j][k] = -1e30;
@ -186,7 +178,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double[] likelihoods = GLs.get(sample).getLikelihoods();
for (int k=0; k <= bestAFguess; k++) {
for (int k=0; k <= AFofMaxLikelihood; k++) {
double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
int bestIndex = k;
@ -214,7 +206,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
}
int startIdx = bestAFguess;
int startIdx = AFofMaxLikelihood;
for (int k = sampleIdx; k > 0; k--) {
int bestGTguess;
String sample = sampleIndices.get(k-1);
@ -263,7 +255,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
return calls;
}

View File

@ -27,9 +27,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.*;
import java.io.PrintStream;
@ -40,17 +46,19 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
// how much off from the max likelihoods do we need to be before we can quit calculating?
protected static final double LOG10_OPTIMIZATION_EPSILON = 8.0;
private AlleleFrequencyMatrix AFMatrix;
protected GridSearchAFEstimation(int N, Logger logger, PrintStream verboseWriter) {
super(N, logger, verboseWriter, false);
super(N, logger, verboseWriter);
AFMatrix = new AlleleFrequencyMatrix(N);
}
public void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors,
int minFrequencyToCalculate) {
protected void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors,
int minFrequencyToCalculate) {
initializeAFMatrix(GLs);
// first, calculate for AF=0 (no change to matrix)
@ -90,10 +98,192 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
*
* @return calls
*/
public Map<String, Genotype> assignGenotypes(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
return generateCalls(contexts, GLs, AFofMaxLikelihood);
protected Map<String, Genotype> assignGenotypes(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
// first, the potential alt calls
for ( String sample : AFMatrix.getSamples() ) {
BiallelicGenotypeLikelihoods GL = GLs.get(sample);
Allele alleleA = GL.getAlleleA();
Allele alleleB = GL.getAlleleB();
// set the genotype and confidence
Pair<Integer, Double> AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample);
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) {
myAlleles.add(alleleA);
myAlleles.add(alleleA);
} else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) {
myAlleles.add(alleleA);
myAlleles.add(alleleB);
} else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() )
myAlleles.add(alleleB);
myAlleles.add(alleleB);
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false));
}
return calls;
}
private void initializeAFMatrix(Map<String, BiallelicGenotypeLikelihoods> GLs) {
AFMatrix.clear();
for ( BiallelicGenotypeLikelihoods GL : GLs.values() )
AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample());
}
protected static class AlleleFrequencyMatrix {
private double[][] matrix; // allele frequency matrix
private int[] indexes; // matrix to maintain which genotype is active
private int maxN; // total possible frequencies in data
private int frequency; // current frequency
// data structures necessary to maintain a list of the best genotypes and their scores
private ArrayList<String> samples = new ArrayList<String>();
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
public AlleleFrequencyMatrix(int N) {
maxN = N;
matrix = new double[N][3];
indexes = new int[N];
clear();
}
public List<String> getSamples() { return samples; }
public void clear() {
frequency = 0;
for (int i = 0; i < maxN; i++)
indexes[i] = 0;
samples.clear();
samplesToGenotypesPerAF.clear();
}
public void setLikelihoods(double AA, double AB, double BB, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.AA.ordinal()] = AA;
matrix[index][GenotypeType.AB.ordinal()] = AB;
matrix[index][GenotypeType.BB.ordinal()] = BB;
}
public void setLikelihoods(double[] GLs, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.AA.ordinal()] = GLs[0];
matrix[index][GenotypeType.AB.ordinal()] = GLs[1];
matrix[index][GenotypeType.BB.ordinal()] = GLs[2];
}
public void incrementFrequency() {
int N = samples.size();
if ( frequency == 2 * N )
throw new ReviewedStingException("Frequency was incremented past N; how is this possible?");
frequency++;
double greedy = VALUE_NOT_CALCULATED;
int greedyIndex = -1;
for (int i = 0; i < N; i++) {
if ( indexes[i] == GenotypeType.AB.ordinal() ) {
if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()];
greedyIndex = i;
}
}
else if ( indexes[i] == GenotypeType.AA.ordinal() ) {
if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()];
greedyIndex = i;
}
// note that we currently don't bother with breaking ties between samples
// (which would be done by looking at the HOM_VAR value) because it's highly
// unlikely that a collision will both occur and that the difference will
// be significant at HOM_VAR...
}
// if this person is already hom var, he can't add another alternate allele
// so we can ignore that case
}
if ( greedyIndex == -1 )
throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?");
if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() )
indexes[greedyIndex] = GenotypeType.BB.ordinal();
else
indexes[greedyIndex] = GenotypeType.AB.ordinal();
}
public double getLikelihoodsOfFrequency() {
double likelihoods = 0.0;
int N = samples.size();
for (int i = 0; i < N; i++)
likelihoods += matrix[i][indexes[i]];
/*
System.out.println(frequency);
for (int i = 0; i < N; i++) {
for (int j=0; j < 3; j++) {
System.out.print(String.valueOf(matrix[i][j]));
System.out.print(indexes[i] == j ? "* " : " ");
}
System.out.println();
}
System.out.println(likelihoods);
System.out.println();
*/
recordGenotypes();
return likelihoods;
}
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
return samplesToGenotypesPerAF.get(frequency).get(sample);
}
private void recordGenotypes() {
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
int index = 0;
for ( String sample : samples ) {
int genotype = indexes[index];
double score;
int maxEntry = MathUtils.maxElementIndex(matrix[index]);
// if the max value is for the most likely genotype, we can compute next vs. next best
if ( genotype == maxEntry ) {
if ( genotype == GenotypeType.AA.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else if ( genotype == GenotypeType.AB.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else // ( genotype == GenotypeType.HOM.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]);
}
// otherwise, we need to calculate the probability of the genotype
else {
double[] normalized = MathUtils.normalizeFromLog10(matrix[index]);
double chosenGenotype = normalized[genotype];
score = -1.0 * Math.log10(1.0 - chosenGenotype);
}
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
index++;
}
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
}
}
}