1. Newest version of the joint estimation model. Faster than previous version and now qscores can get to be > 39.8 for hets.
2. More sanity checks in annotations git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2119 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ee2abd30c4
commit
14bf6ce83c
|
|
@ -99,7 +99,13 @@ public class AlleleBalance implements VariantAnnotation {
|
|||
|
||||
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
||||
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
||||
weights.add(posteriors[bestGenotype.ordinal()]);
|
||||
double weight = posteriors[bestGenotype.ordinal()];
|
||||
|
||||
// sanity check
|
||||
if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
|
||||
continue;
|
||||
|
||||
weights.add(weight);
|
||||
refBalances.add((double)refCount / (double)(refCount + altCount));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,13 @@ public class OnOffGenotype implements VariantAnnotation {
|
|||
|
||||
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
||||
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
||||
weights.add(posteriors[bestGenotype.ordinal()]);
|
||||
double weight = posteriors[bestGenotype.ordinal()];
|
||||
|
||||
// sanity check
|
||||
if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
|
||||
continue;
|
||||
|
||||
weights.add(weight);
|
||||
onOffBalances.add((double)onCount / (double)totalCount);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
// the GenotypeLikelihoods map
|
||||
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
||||
private HashMap<Character, AlleleFrequencyMatrix> AFMatrixMap = new HashMap<Character, AlleleFrequencyMatrix>();
|
||||
|
||||
private enum GenotypeType { REF, HET, HOM }
|
||||
|
||||
|
|
@ -26,10 +27,18 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
// initialize the GenotypeLikelihoods
|
||||
GLs.clear();
|
||||
AFMatrixMap.clear();
|
||||
|
||||
// for each alternate allele, create a new matrix
|
||||
for ( char alt : BaseUtils.BASES ) {
|
||||
if ( alt != ref )
|
||||
AFMatrixMap.put(alt, new AlleleFrequencyMatrix(contexts.size()));
|
||||
}
|
||||
|
||||
// use flat priors for GLs
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors();
|
||||
|
||||
int index = 0;
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
AlignmentContextBySample context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
|
|
@ -38,34 +47,33 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
GL.add(pileup, true);
|
||||
GLs.put(sample, GL);
|
||||
|
||||
double[] posteriors = GL.getPosteriors();
|
||||
|
||||
// for each alternate allele, fill the matrix
|
||||
for ( char alt : BaseUtils.BASES ) {
|
||||
if ( alt != ref ) {
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index);
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
|
||||
double PofDgivenAFi = 0.0;
|
||||
// *** note that this code assumes that allele frequencies are passed in IN ORDER from 0 to 2N
|
||||
|
||||
// for each sample
|
||||
for ( GenotypeLikelihoods GL : GLs.values() ) {
|
||||
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
|
||||
|
||||
double[] posteriors = GL.getPosteriors();
|
||||
|
||||
double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
|
||||
allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
|
||||
|
||||
// calculate the posterior weighted frequencies
|
||||
double[] HWvalues = getHardyWeinbergValues(f);
|
||||
double samplePofDgivenAFi = 0.0;
|
||||
samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
|
||||
samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
|
||||
samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
|
||||
PofDgivenAFi += Math.log10(samplePofDgivenAFi);
|
||||
}
|
||||
|
||||
return PofDgivenAFi;
|
||||
// for any frequency other than zero, calculate the next greedy entry
|
||||
if ( MathUtils.compareDoubles(f, 0.0) != 0 )
|
||||
matrix.incrementFrequency();
|
||||
|
||||
return matrix.getLikelihoodsOfFrequency();
|
||||
}
|
||||
|
||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
|
||||
|
|
@ -98,4 +106,82 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
return calls;
|
||||
}
|
||||
|
||||
protected class AlleleFrequencyMatrix {
|
||||
|
||||
private double[][] matrix;
|
||||
private int[] indexes;
|
||||
private int N;
|
||||
private int frequency;
|
||||
|
||||
public AlleleFrequencyMatrix(int N) {
|
||||
this.N = N;
|
||||
frequency = 0;
|
||||
matrix = new double[N][3];
|
||||
indexes = new int[N];
|
||||
for (int i = 0; i < N; i++)
|
||||
indexes[i] = 0;
|
||||
}
|
||||
|
||||
public void setLikelihoods(double AA, double AB, double BB, int index) {
|
||||
matrix[index][GenotypeType.REF.ordinal()] = AA;
|
||||
matrix[index][GenotypeType.HET.ordinal()] = AB;
|
||||
matrix[index][GenotypeType.HOM.ordinal()] = BB;
|
||||
}
|
||||
|
||||
public void incrementFrequency() {
|
||||
if ( frequency == 2 * N )
|
||||
throw new StingException("Frequency was incremented past N; how is this possible?");
|
||||
frequency++;
|
||||
|
||||
double greedy = -1.0 * Double.MAX_VALUE;
|
||||
int greedyIndex = -1;
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
||||
if ( indexes[i] == GenotypeType.HET.ordinal() ) {
|
||||
if ( matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()] > greedy ) {
|
||||
greedy = matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()];
|
||||
greedyIndex = i;
|
||||
}
|
||||
}
|
||||
else if ( indexes[i] == GenotypeType.REF.ordinal() ) {
|
||||
if ( matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()] > greedy ) {
|
||||
greedy = matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()];
|
||||
greedyIndex = i;
|
||||
}
|
||||
// note that we currently don't bother with breaking ties between samples
|
||||
// (which would be done by looking at the HOM_VAR value) because it's highly
|
||||
// unlikely that a collision will both occur and that the difference will
|
||||
// be significant at HOM_VAR...
|
||||
}
|
||||
// if this person is already hom var, he can't add another alternate allele
|
||||
// so we can ignore that case
|
||||
}
|
||||
if ( greedyIndex == -1 )
|
||||
throw new StingException("There is no best choice for a new alternate allele; how is this possible?");
|
||||
|
||||
if ( indexes[greedyIndex] == GenotypeType.HET.ordinal() )
|
||||
indexes[greedyIndex] = GenotypeType.HOM.ordinal();
|
||||
else
|
||||
indexes[greedyIndex] = GenotypeType.HET.ordinal();
|
||||
}
|
||||
|
||||
public double getLikelihoodsOfFrequency() {
|
||||
double likelihoods = 0.0;
|
||||
for (int i = 0; i < N; i++)
|
||||
likelihoods += matrix[i][indexes[i]];
|
||||
|
||||
//verboseWriter.write(frequency + "\n");
|
||||
//for (int i = 0; i < N; i++) {
|
||||
// for (int j=0; j < 3; j++) {
|
||||
// verboseWriter.write(String.valueOf(matrix[i][j]));
|
||||
// verboseWriter.write(indexes[i] == j ? "* " : " ");
|
||||
// }
|
||||
// verboseWriter.write("\n");
|
||||
//}
|
||||
//verboseWriter.write(likelihoods + "\n\n");
|
||||
|
||||
return likelihoods;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue