1. Newest version of the joint estimation model. Faster than previous version and now qscores can get to be > 39.8 for hets.
2. More sanity checks in annotations git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2119 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ee2abd30c4
commit
14bf6ce83c
|
|
@ -99,7 +99,13 @@ public class AlleleBalance implements VariantAnnotation {
|
||||||
|
|
||||||
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
||||||
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
||||||
weights.add(posteriors[bestGenotype.ordinal()]);
|
double weight = posteriors[bestGenotype.ordinal()];
|
||||||
|
|
||||||
|
// sanity check
|
||||||
|
if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
weights.add(weight);
|
||||||
refBalances.add((double)refCount / (double)(refCount + altCount));
|
refBalances.add((double)refCount / (double)(refCount + altCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,13 @@ public class OnOffGenotype implements VariantAnnotation {
|
||||||
|
|
||||||
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
||||||
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
||||||
weights.add(posteriors[bestGenotype.ordinal()]);
|
double weight = posteriors[bestGenotype.ordinal()];
|
||||||
|
|
||||||
|
// sanity check
|
||||||
|
if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
weights.add(weight);
|
||||||
onOffBalances.add((double)onCount / (double)totalCount);
|
onOffBalances.add((double)onCount / (double)totalCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
|
|
||||||
// the GenotypeLikelihoods map
|
// the GenotypeLikelihoods map
|
||||||
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
||||||
|
private HashMap<Character, AlleleFrequencyMatrix> AFMatrixMap = new HashMap<Character, AlleleFrequencyMatrix>();
|
||||||
|
|
||||||
private enum GenotypeType { REF, HET, HOM }
|
private enum GenotypeType { REF, HET, HOM }
|
||||||
|
|
||||||
|
|
@ -26,10 +27,18 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
// initialize the GenotypeLikelihoods
|
// initialize the GenotypeLikelihoods
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
|
AFMatrixMap.clear();
|
||||||
|
|
||||||
|
// for each alternate allele, create a new matrix
|
||||||
|
for ( char alt : BaseUtils.BASES ) {
|
||||||
|
if ( alt != ref )
|
||||||
|
AFMatrixMap.put(alt, new AlleleFrequencyMatrix(contexts.size()));
|
||||||
|
}
|
||||||
|
|
||||||
// use flat priors for GLs
|
// use flat priors for GLs
|
||||||
DiploidGenotypePriors priors = new DiploidGenotypePriors();
|
DiploidGenotypePriors priors = new DiploidGenotypePriors();
|
||||||
|
|
||||||
|
int index = 0;
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
AlignmentContextBySample context = contexts.get(sample);
|
AlignmentContextBySample context = contexts.get(sample);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||||
|
|
@ -38,34 +47,33 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
GL.add(pileup, true);
|
GL.add(pileup, true);
|
||||||
GLs.put(sample, GL);
|
GLs.put(sample, GL);
|
||||||
|
|
||||||
|
double[] posteriors = GL.getPosteriors();
|
||||||
|
|
||||||
|
// for each alternate allele, fill the matrix
|
||||||
|
for ( char alt : BaseUtils.BASES ) {
|
||||||
|
if ( alt != ref ) {
|
||||||
|
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||||
|
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
||||||
|
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||||
|
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
|
||||||
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
|
||||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
|
||||||
|
|
||||||
double PofDgivenAFi = 0.0;
|
// *** note that this code assumes that allele frequencies are passed in IN ORDER from 0 to 2N
|
||||||
|
|
||||||
// for each sample
|
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
|
||||||
for ( GenotypeLikelihoods GL : GLs.values() ) {
|
|
||||||
|
|
||||||
double[] posteriors = GL.getPosteriors();
|
// for any frequency other than zero, calculate the next greedy entry
|
||||||
|
if ( MathUtils.compareDoubles(f, 0.0) != 0 )
|
||||||
|
matrix.incrementFrequency();
|
||||||
|
|
||||||
double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
|
return matrix.getLikelihoodsOfFrequency();
|
||||||
allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
|
|
||||||
|
|
||||||
// calculate the posterior weighted frequencies
|
|
||||||
double[] HWvalues = getHardyWeinbergValues(f);
|
|
||||||
double samplePofDgivenAFi = 0.0;
|
|
||||||
samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
|
|
||||||
samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
|
|
||||||
samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
|
|
||||||
PofDgivenAFi += Math.log10(samplePofDgivenAFi);
|
|
||||||
}
|
|
||||||
|
|
||||||
return PofDgivenAFi;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
|
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
|
||||||
|
|
@ -98,4 +106,82 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
|
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected class AlleleFrequencyMatrix {
|
||||||
|
|
||||||
|
private double[][] matrix;
|
||||||
|
private int[] indexes;
|
||||||
|
private int N;
|
||||||
|
private int frequency;
|
||||||
|
|
||||||
|
public AlleleFrequencyMatrix(int N) {
|
||||||
|
this.N = N;
|
||||||
|
frequency = 0;
|
||||||
|
matrix = new double[N][3];
|
||||||
|
indexes = new int[N];
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
indexes[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLikelihoods(double AA, double AB, double BB, int index) {
|
||||||
|
matrix[index][GenotypeType.REF.ordinal()] = AA;
|
||||||
|
matrix[index][GenotypeType.HET.ordinal()] = AB;
|
||||||
|
matrix[index][GenotypeType.HOM.ordinal()] = BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementFrequency() {
|
||||||
|
if ( frequency == 2 * N )
|
||||||
|
throw new StingException("Frequency was incremented past N; how is this possible?");
|
||||||
|
frequency++;
|
||||||
|
|
||||||
|
double greedy = -1.0 * Double.MAX_VALUE;
|
||||||
|
int greedyIndex = -1;
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
|
||||||
|
if ( indexes[i] == GenotypeType.HET.ordinal() ) {
|
||||||
|
if ( matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()] > greedy ) {
|
||||||
|
greedy = matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()];
|
||||||
|
greedyIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( indexes[i] == GenotypeType.REF.ordinal() ) {
|
||||||
|
if ( matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()] > greedy ) {
|
||||||
|
greedy = matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()];
|
||||||
|
greedyIndex = i;
|
||||||
|
}
|
||||||
|
// note that we currently don't bother with breaking ties between samples
|
||||||
|
// (which would be done by looking at the HOM_VAR value) because it's highly
|
||||||
|
// unlikely that a collision will both occur and that the difference will
|
||||||
|
// be significant at HOM_VAR...
|
||||||
|
}
|
||||||
|
// if this person is already hom var, he can't add another alternate allele
|
||||||
|
// so we can ignore that case
|
||||||
|
}
|
||||||
|
if ( greedyIndex == -1 )
|
||||||
|
throw new StingException("There is no best choice for a new alternate allele; how is this possible?");
|
||||||
|
|
||||||
|
if ( indexes[greedyIndex] == GenotypeType.HET.ordinal() )
|
||||||
|
indexes[greedyIndex] = GenotypeType.HOM.ordinal();
|
||||||
|
else
|
||||||
|
indexes[greedyIndex] = GenotypeType.HET.ordinal();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getLikelihoodsOfFrequency() {
|
||||||
|
double likelihoods = 0.0;
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
likelihoods += matrix[i][indexes[i]];
|
||||||
|
|
||||||
|
//verboseWriter.write(frequency + "\n");
|
||||||
|
//for (int i = 0; i < N; i++) {
|
||||||
|
// for (int j=0; j < 3; j++) {
|
||||||
|
// verboseWriter.write(String.valueOf(matrix[i][j]));
|
||||||
|
// verboseWriter.write(indexes[i] == j ? "* " : " ");
|
||||||
|
// }
|
||||||
|
// verboseWriter.write("\n");
|
||||||
|
//}
|
||||||
|
//verboseWriter.write(likelihoods + "\n\n");
|
||||||
|
|
||||||
|
return likelihoods;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue