Combined Scores, bug fixed for printing HLA-C

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1685 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
sjia 2009-09-22 18:28:16 +00:00
parent 682b765536
commit 22932042ea
1 changed files with 226 additions and 108 deletions

View File

@ -36,12 +36,14 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
public boolean suppressPrinting = false;
//String HLAdatabaseFile = "/Users/shermanjia/Work/HLA.sam";
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.sam";
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.4digitUnique.sam";
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.sam";
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam";
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam";
//String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/CEU_Founders_HLA.freq";
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
ArrayList<String> HLAreads = new ArrayList<String>();
@ -59,7 +61,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
double[] SingleAlleleFrequencies;
double[][] LOD; String[][] Alleles;
double[][] LikelihoodScores;
int[][] PhasingScores;
double[][] PhasingScores;
double[][]PhasingProbabilities;
double[][] CombinedAlleleFrequencies;
int j1, k1, j2, k2;
int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1;
@ -70,9 +73,12 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
Hashtable Scores = new Hashtable();
Hashtable SNPs = new Hashtable();
ArrayList<String> SNPchars = new ArrayList<String>();
ArrayList<Integer> SNPlocations = new ArrayList<Integer>();
Integer SNPcount = 0;
int[][] SNPcorrelation;
double[][] SNPcorrelationProb;
String[][] SNPhaplotypes;
boolean DatabaseLoaded = false;
boolean PrintedOutput = false;
@ -116,7 +122,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
SingleAlleleFrequencies = new double[n];
LOD = new double[n][n];
LikelihoodScores = new double[n][n];
PhasingScores = new int[n][n];
PhasingScores = new double[n][n];
PhasingProbabilities = new double[n][n];
CombinedAlleleFrequencies = new double[n][n];
for (i = 0; i < n; i++){
@ -128,17 +135,18 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
for (int j = 0; j <n; j++){
LOD[i][j]=0;
LikelihoodScores[i][j]=0;
PhasingScores[i][j]=0;
PhasingScores[i][j]=0.0;
PhasingProbabilities[i][j]=0;
CombinedAlleleFrequencies[i][j]=0.0;
}
//For debugging: get index for specific alleles
if (HLAnames.get(i).equals("HLA_B*0801"))
if (HLAnames.get(i).equals("HLA_A*3101"))
j1 = i;
if (HLAnames.get(i).equals("HLA_B*5601"))
if (HLAnames.get(i).equals("HLA_A*3201"))
k1 = i;
if (HLAnames.get(i).equals("HLA_B*0766"))
if (HLAnames.get(i).equals("HLA_A*3121"))
j2 = i;
if (HLAnames.get(i).equals("HLA_B*0726"))
if (HLAnames.get(i).equals("HLA_A*3207"))
k2 = i;
}
out.printf("DONE! Read %s alleles\n",HLAreads.size());
@ -214,8 +222,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
cigarPlaceholder = i+1;
readPlaceholder = readPlaceholder + subreadLength;
} else if (c == 'H'){
//(Headers removed here)***
} else if (c == 'H' || c == 'S'){
//(H = Headers or S = Soft clipped removed here)***
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
count = cigar.substring(cigarPlaceholder, i);
@ -294,14 +302,15 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
}
//Debugging purposes
if (DEBUG) {out.printf("A[%s]\tC[%s]\tT[%s]\tg[%s]\t",numAs,numCs,numTs,numGs);}
if (DEBUG) {out.printf("A[%s]C[%s]T[%s]G[%s]\t",numAs,numCs,numTs,numGs);}
if (depth > 0){
//Store confidence scores - this is a local hash that we use to get likelihood given a particular genotype
Scores = new Hashtable();
Double likelihood = 0.0;
Double likelihood = 0.0; double maxlikelihood = 0.0;
for ( DiploidGenotype g : DiploidGenotype.values() ) {
likelihood = G.getLikelihood(g);
if (maxlikelihood == 0.0 || likelihood > maxlikelihood){maxlikelihood = likelihood;}
Scores.put(g.toString(), likelihood);
//also hash other combination not stored by DiploidGenotype
if (g.toString().equals("AC")) {
@ -326,8 +335,11 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
//Add SNP if it is a SNP and hasn't been added before
for ( DiploidGenotype g : DiploidGenotype.values() ) {
likelihood = G.getLikelihood(g);
if (likelihood > homreflikelihood && !SNPs.containsKey(Long.toString(loc))){
SNPcount++; SNPs.put(Long.toString(loc),SNPcount); SNPlocations.add(Integer.valueOf(Long.toString(loc)));
if ((likelihood > homreflikelihood) && (likelihood == maxlikelihood) && (!SNPs.containsKey(Long.toString(loc)))){
SNPcount++;
SNPs.put(Long.toString(loc),SNPcount);
SNPlocations.add(Integer.valueOf(Long.toString(loc)));
SNPchars.add(g.toString());
}
}
@ -369,30 +381,30 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
if (loc >= HLAstartpos[k] && loc <= HLAstoppos[k]){
pos_k = loc - HLAstartpos[k];
c2 = Character.toString(Character.toUpperCase(HLAreads.get(k).charAt((int) pos_k)));
//updates likelihoods for both permutations of the alleles, normalized to the likelihood for homozygous reference
if (!homref.equals(c1+c2)){
if (Scores.containsKey(c1 + c2)){
//out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2));
if (Scores.containsKey(c1 + c2)){
//out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2));
if (!homref.equals(c1+c2) || Double.parseDouble((String) Scores.get(homref).toString()) != 0){
likelihood = Double.parseDouble((String) Scores.get(c1 + c2).toString());
LikelihoodScores[j][k] = LikelihoodScores[j][k] + likelihood;
LOD[j][k]= LOD[j][k] + likelihood - homreflikelihood;
} else{
if (DEBUG){
//out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k));
}
}
} else{
if (DEBUG){
//out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k));
}
}
}
}
}
}
if ( DEBUG ){
//Debugging: print updated likelihoods for 2 sets of HLA alleles, as well as normalized likelihoods for all 10 genotypes
out.printf("%s%s:%5.0f\t%s%s:%5.0f\t",r1,r2,LOD[j1][k1],s1,s2,LOD[j2][k2]);
out.printf("LOD[%s%s]:%5.1fL:%5.1fLOD[%s%s]:%5.1fL:%5.1f\t",r1,r2,LOD[j1][k1],LikelihoodScores[j1][k1],s1,s2,LOD[j2][k2],LikelihoodScores[j2][k2]);
for ( DiploidGenotype g : DiploidGenotype.values() ) {
out.printf("%s %5.0f\t",g.toString(),Scores.get(g.toString()));
out.printf("%s %5.1f\t",g.toString(),Scores.get(g.toString()));
}
out.printf("\n");
}
@ -401,45 +413,6 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
return context.getReads().size();
}
private int GetPhaseScore(int alleleindex){
int score = 0;
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
ArrayList<Integer> readindex = new ArrayList<Integer>();
Hashtable indexer = new Hashtable();
indexer.put('A', (Integer) 0);
indexer.put('C', (Integer) 1);
indexer.put('G', (Integer) 2);
indexer.put('T', (Integer) 3);
indexer.put('D', (Integer) 4); // D for deletion
//Get HLA allele sequence and position given index
String allele = HLAreads.get(alleleindex);
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
//Finds SNPs in allele
for (int i = allelestart; i <= allelestop; i++){
if (SNPs.containsKey(String.valueOf(i))){
//Stores matrix index
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
//stores position along read
readindex.add((Integer) i - allelestart);
}
}
//sum score for every pair of SNPs in the allele
for (int i = 0; i < SNPsInRead.size(); i++){
for (int j = i+1; j < SNPsInRead.size(); j ++){
char c1 = allele.charAt((int) readindex.get(i));
char c2 = allele.charAt((int) readindex.get(j));
if (indexer.get(c1) != null && indexer.get(c2) != null){
int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1);
int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2);
score += SNPcorrelation[a][b];
}
}
}
return score;
}
private void UpdateCorrelation(SAMRecord read, boolean PRINT){
//Updates correlation table with SNPs from specific read (for phasing)
@ -491,7 +464,138 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
}
}
}
private int GetPhaseScore(int alleleindex){
int score = 0;
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
ArrayList<Integer> readindex = new ArrayList<Integer>();
Hashtable indexer = new Hashtable();
indexer.put('A', (Integer) 0);
indexer.put('C', (Integer) 1);
indexer.put('G', (Integer) 2);
indexer.put('T', (Integer) 3);
indexer.put('D', (Integer) 4); // D for deletion
//Get HLA allele sequence and position given index
String allele = HLAreads.get(alleleindex);
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
//Finds SNPs in allele
for (int i = allelestart; i <= allelestop; i++){
if (SNPs.containsKey(String.valueOf(i))){
//Stores matrix index
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
//stores position along read
readindex.add((Integer) i - allelestart);
}
}
//sum score for every pair of SNPs in the allele
for (int i = 0; i < SNPsInRead.size(); i++){
for (int j = i+1; j < SNPsInRead.size(); j ++){
char c1 = allele.charAt((int) readindex.get(i));
char c2 = allele.charAt((int) readindex.get(j));
if (indexer.get(c1) != null && indexer.get(c2) != null){
int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1);
int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2);
score += SNPcorrelation[a][b];
}
}
}
return score;
}
private double GetPhaseProbability(int alleleindex){
double prob = 1;
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
ArrayList<Integer> readindex = new ArrayList<Integer>();
ArrayList<String> Genotypes = new ArrayList<String>();
Hashtable indexer = new Hashtable();
indexer.put('A', (Integer) 0);
indexer.put('C', (Integer) 1);
indexer.put('G', (Integer) 2);
indexer.put('T', (Integer) 3);
//indexer.put('D', (Integer) 4); // D for deletion
//Get HLA allele sequence and position given index
String allele = HLAreads.get(alleleindex);
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
//Finds SNPs in allele
for (int i = allelestart; i <= allelestop; i++){
if (SNPs.containsKey(String.valueOf(i))){
//Stores matrix index
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
//stores position along read
readindex.add((Integer) i - allelestart);
//stores genotypes at SNPs
Genotypes.add(SNPchars.get((Integer) SNPs.get(String.valueOf(i))-1));
}
}
char c1, c2, gi1, gi2, gj1, gj2;
int a, b, a1, a2, b1, b2;
double numerator, denominator;
//sum score for every pair of SNPs in the allele
for (int i = 0; i < SNPsInRead.size()-1; i++){
int j = i + 1;
c1 = allele.charAt((int) readindex.get(i));
c2 = allele.charAt((int) readindex.get(j));
gi1 = Genotypes.get(i).toCharArray()[0];
gi2 = Genotypes.get(i).toCharArray()[1];
gj1 = Genotypes.get(j).toCharArray()[0];
gj2 = Genotypes.get(j).toCharArray()[1];
numerator = 0; denominator = 0;
if (indexer.get(c1) != null && indexer.get(c2) != null){
a = (SNPsInRead.get(i)-1)*5;
b = (SNPsInRead.get(j)-1)*5;
for (int k = 0; k < 5; k++){
for (int l = 0; l < 5; l++){
if (DEBUG){}//out.printf("[%s,%s]=%s, sum=%s\n",a+k,b+l,SNPcorrelation[a+k][b+l],denominator);}
denominator = denominator + SNPcorrelation[a+k][b+l];
}
}
if (denominator > 0){
//indicies for genotypes at the 2 SNPs
a1 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi1);
b1 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj1);
a2 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi2);
b2 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj2);
if (gi1 == gi2 && gj1 == gj2){
if (c1 == gi1 && c2 == gj1){
numerator = SNPcorrelation[a1][b1];
}
} else if ((c1 == gi1 && c2 == gj1) || (c1 == gi2 && c2 == gj2)){
numerator = SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2];
} else if((c1 == gi2 && c2 == gj1) || (c1 == gi1 && c2 == gj2)){
numerator = SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2];
} else {
if ((SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]) > (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2])){
numerator = denominator - (SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]);
}else{
numerator = denominator - (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2]);
}
}
if (numerator == 0){
prob = 0.01;
}else{
prob = prob * numerator/denominator;
}
if (DEBUG){
out.printf("%s: %s,%s\tC1,C2=[%s,%s]\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t%s/%s=%.3f\tprob=%.3f\n",readindex.get(i)+allelestart,readindex.get(j)+allelestart,alleleindex,c1,c2,gi1,gj1,SNPcorrelation[a1][b1],gi1,gj2,SNPcorrelation[a1][b2],gi2,gj1,SNPcorrelation[a2][b1],gi2,gj2,SNPcorrelation[a2][b2],numerator,denominator,numerator/denominator,prob);
}
}
}
}
return prob;
}
public boolean isReduceByInterval() {
return true;
}
@ -509,13 +613,13 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
ArrayList<Integer> TopAlleles = new ArrayList<Integer>();
double maxA = 0; int i_maxA =0; int j_maxA = 0; int maxAphase = 0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0;
double maxA = 0; int i_maxA =0; int j_maxA = 0; double maxAphase = 0.0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0; double maxProbA = 0.0;
double maxA2 = 0; int i_maxA_2 =0; int j_maxA_2 = 0;
double maxB = 0; int i_maxB =0; int j_maxB = 0; int maxBphase = 0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0;
double maxB = 0; int i_maxB =0; int j_maxB = 0; double maxBphase = 0.0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0; double maxProbB = 0.0;
double maxB2 = 0; int i_maxB_2 =0; int j_maxB_2 = 0;
double maxC = 0; int i_maxC =0; int j_maxC = 0; int maxCphase = 0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0;
double maxC = 0; int i_maxC =0; int j_maxC = 0; double maxCphase = 0.0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0; double maxProbC = 0.0;
double maxC2 = 0; int i_maxC_2 =0; int j_maxC_2 = 0;
@ -549,44 +653,41 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
for (Integer i = 0; i < numHLAlleles; i++){
for (Integer j = i; j < numHLAlleles; j++){
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1 && maxA > 0){
if (maxA - LOD[i][j] <= 5 && maxA - LOD[i][j] >= 0){
if (maxA - LOD[i][j] <= 5 && maxA >= LOD[i][j]){
inverseMaxProbA = inverseMaxProbA + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA);
if (DEBUG){
out.printf("HLA-A: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA);
}
}
if (LOD[i][j] == maxA){
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
if (DEBUG){
out.printf("HLA-A: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,LOD[i][j],maxA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA);
}
}
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1 && maxB > 0){
if (maxB - LOD[i][j] <= 5 && maxB - LOD[i][j] >= 0){
inverseMaxProbB = inverseMaxProbB + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB);
if (DEBUG){
out.printf("HLA-B: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodB,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbB);
}
}
if (LOD[i][j] == maxB){
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
if (DEBUG){
out.printf("HLA-B: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,LOD[i][j],maxB,LikelihoodScores[i][j]-maxlikelihoodB,inverseMaxProbB);
}
}
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1 && maxC > 0){
if (maxC - LOD[i][j] <= 5 && maxC - LOD[i][j] >= 0){
inverseMaxProbC = inverseMaxProbC + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC);
if (DEBUG){
out.printf("HLA-C: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodC,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbC);
}
}
if (LOD[i][j] == maxC){
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
if (DEBUG){
out.printf("HLA-C: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,LOD[i][j],maxC,LikelihoodScores[i][j]-maxlikelihoodC,inverseMaxProbC);
}
}
}
}
}
out.printf("\nCalculating SNP correlation matrix for %s SNPs\n",SNPcount);
SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5];
SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5]; //keep track of counts of each pair of SNPs
SNPcorrelationProb = new double[SNPs.size()][3]; // keep track of probabilities for specific haplotype at 2 SNPs.
//Create correlation matrix and update correlation scores for all reads
@ -633,12 +734,14 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
out.printf("Calculating phase scores for %s top alleles\n",TopAlleles.size());
//Calculate Phase score for each allele
int[] SinglePhaseScores = new int[TopAlleles.size()];
double[] SinglePhaseScores = new double[TopAlleles.size()];
for (int i = 0; i < TopAlleles.size(); i++){
SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i));
//SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i));
SinglePhaseScores[i] = GetPhaseProbability(TopAlleles.get(i));
if ( DEBUG ){
//Debugging: print list of alleles to be checked for phasing
out.printf("index=%s\t%s\tscore=%s\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]);
out.printf("index=%s\t%s\tscore=%.3f\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]);
}
}
@ -647,68 +750,83 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
String alleleA, alleleB;
Double freq1 = 0.0, freq2 = 0.0;
Double ProbSumA = 0.0, ProbSumB = 0.0, ProbSumC = 0.0, likelihoodPrior;
Double PhaseSumA = 0.0, PhaseSumB = 0.0, PhaseSumC = 0.0;
for (Integer i = 0; i < numHLAlleles; i++){
for (Integer j = i; j < numHLAlleles; j++){
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){
if (LOD[i][j] == maxA && maxA > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
if ((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
if (PhasingScores[i][j] > maxAphase){maxAphase = PhasingScores[i][j];}
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
if (CombinedAlleleFrequencies[i][j] > maxAfreq){maxAfreq = CombinedAlleleFrequencies[i][j];}
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA;
ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j];
ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
PhaseSumA = PhaseSumA + PhasingScores[i][j];
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbA){maxProbA = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
}
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){
if (LOD[i][j] == maxB && maxB > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
if ((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
if (PhasingScores[i][j] > maxBphase){maxBphase = PhasingScores[i][j];}
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
if (freq1*freq2 > maxBfreq){maxBfreq = freq1*freq2;}
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB;
ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j];
ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
PhaseSumB = PhaseSumB + PhasingScores[i][j];
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbB){maxProbB = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
}
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){
if (LOD[i][j] == maxC && maxC > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
if ((LOD[i][j] >= maxC - 5)&& LOD[i][j] > 0){
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
if (PhasingScores[i][j] > maxCphase){maxCphase = PhasingScores[i][j];}
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
if (freq1*freq2 > maxCfreq){maxCfreq = freq1*freq2;}
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC;
ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j];
ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
PhaseSumC = PhaseSumC + PhasingScores[i][j];
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbC){maxProbC = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
if (DEBUG){
out.printf("DEBUG: %s\t%s\tPhaseScore[%.3f]\t%.3f\n",alleleA,alleleB,PhasingScores[i][j],PhaseSumC);
}
}
}
}
}
if (DEBUG){
out.printf("DEBUG: Phase sum for HLA-C: %.3f\n",PhaseSumC);
}
out.print("Printing results ...\n");
//Print allele pairs with highest likelihood score and highest phasing score
for (Integer i = 0; i < numHLAlleles; i++){
for (Integer j = i; j < numHLAlleles; j++){
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){
if(LOD[i][j] == maxA && maxA > 0 && PhasingScores[i][j] == maxAphase){
if((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxAphase
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA;
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA);
if (CombinedAlleleFrequencies[i][j] == maxAfreq){out.printf("\tBEST");}
//out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%.3f\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA);
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumA,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumA);
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbA){out.printf("\tBEST");}
out.printf("\n");
}
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){
if(LOD[i][j] == maxB && maxB > 0 && PhasingScores[i][j] == maxBphase){
if((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxBphase
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB;
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,inverseMaxProbB,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumB);
if (CombinedAlleleFrequencies[i][j] == maxBfreq){out.printf("\tBEST");}
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumB,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumB);
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbB){out.printf("\tBEST");}
out.printf("\n");
}
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){
if(LOD[i][j] == maxC && maxC > 0 && PhasingScores[i][j] == maxCphase){
if((LOD[i][j] >= maxC - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxCphase
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC;
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,inverseMaxProbC,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumC);
if (CombinedAlleleFrequencies[i][j] == maxCfreq){out.printf("\tBEST");}
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumC,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumC);
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbC){out.printf("\tBEST");}
out.printf("\n");
}
}