From 22932042ea76c2e81049178031e0c23b5ea65e58 Mon Sep 17 00:00:00 2001 From: sjia Date: Tue, 22 Sep 2009 18:28:16 +0000 Subject: [PATCH] Combined Scores, bug fixed for printing HLA-C git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1685 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/HLAcaller/CallHLAWalker.java | 334 ++++++++++++------ 1 file changed, 226 insertions(+), 108 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java index 4cbbfe8f9..175333fbf 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java @@ -36,12 +36,14 @@ public class CallHLAWalker extends LocusWalker>{ public boolean suppressPrinting = false; //String HLAdatabaseFile = "/Users/shermanjia/Work/HLA.sam"; - //String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.sam"; + String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.4digitUnique.sam"; //String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.sam"; - String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam"; + //String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam"; //String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/CEU_Founders_HLA.freq"; String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq"; + String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq"; + ArrayList HLAreads = new ArrayList(); @@ -59,7 +61,8 @@ public class CallHLAWalker extends LocusWalker>{ double[] SingleAlleleFrequencies; double[][] LOD; String[][] Alleles; double[][] LikelihoodScores; - int[][] PhasingScores; + double[][] PhasingScores; + double[][]PhasingProbabilities; double[][] CombinedAlleleFrequencies; int j1, k1, j2, k2; int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1; @@ -70,9 +73,12 @@ public class CallHLAWalker extends LocusWalker>{ Hashtable Scores = new Hashtable(); Hashtable SNPs = new Hashtable(); + ArrayList SNPchars = new ArrayList(); ArrayList SNPlocations = new ArrayList(); Integer SNPcount = 0; int[][] SNPcorrelation; + double[][] SNPcorrelationProb; + String[][] SNPhaplotypes; boolean DatabaseLoaded = false; boolean PrintedOutput = false; @@ -116,7 +122,8 @@ public class CallHLAWalker extends LocusWalker>{ SingleAlleleFrequencies = new double[n]; LOD = new double[n][n]; LikelihoodScores = new double[n][n]; - PhasingScores = new int[n][n]; + PhasingScores = new double[n][n]; + PhasingProbabilities = new double[n][n]; CombinedAlleleFrequencies = new double[n][n]; for (i = 0; i < n; i++){ @@ -128,17 +135,18 @@ public class CallHLAWalker extends LocusWalker>{ for (int j = 0; j >{ //increment placeholders without adding inserted bases to sequence (effectively removes insertion). cigarPlaceholder = i+1; readPlaceholder = readPlaceholder + subreadLength; - } else if (c == 'H'){ - //(Headers removed here)*** + } else if (c == 'H' || c == 'S'){ + //(H = Headers or S = Soft clipped removed here)*** //If reaches H for insertion, get number before 'H' and skip that many characters in sequence count = cigar.substring(cigarPlaceholder, i); @@ -294,14 +302,15 @@ public class CallHLAWalker extends LocusWalker>{ } //Debugging purposes - if (DEBUG) {out.printf("A[%s]\tC[%s]\tT[%s]\tg[%s]\t",numAs,numCs,numTs,numGs);} + if (DEBUG) {out.printf("A[%s]C[%s]T[%s]G[%s]\t",numAs,numCs,numTs,numGs);} if (depth > 0){ //Store confidence scores - this is a local hash that we use to get likelihood given a particular genotype Scores = new Hashtable(); - Double likelihood = 0.0; + Double likelihood = 0.0; double maxlikelihood = 0.0; for ( DiploidGenotype g : DiploidGenotype.values() ) { likelihood = G.getLikelihood(g); + if (maxlikelihood == 0.0 || likelihood > maxlikelihood){maxlikelihood = likelihood;} Scores.put(g.toString(), likelihood); //also hash other combination not stored by DiploidGenotype if (g.toString().equals("AC")) { @@ -326,8 +335,11 @@ public class CallHLAWalker extends LocusWalker>{ //Add SNP if it is a SNP and hasn't been added before for ( DiploidGenotype g : DiploidGenotype.values() ) { likelihood = G.getLikelihood(g); - if (likelihood > homreflikelihood && !SNPs.containsKey(Long.toString(loc))){ - SNPcount++; SNPs.put(Long.toString(loc),SNPcount); SNPlocations.add(Integer.valueOf(Long.toString(loc))); + if ((likelihood > homreflikelihood) && (likelihood == maxlikelihood) && (!SNPs.containsKey(Long.toString(loc)))){ + SNPcount++; + SNPs.put(Long.toString(loc),SNPcount); + SNPlocations.add(Integer.valueOf(Long.toString(loc))); + SNPchars.add(g.toString()); } } @@ -369,30 +381,30 @@ public class CallHLAWalker extends LocusWalker>{ if (loc >= HLAstartpos[k] && loc <= HLAstoppos[k]){ pos_k = loc - HLAstartpos[k]; c2 = Character.toString(Character.toUpperCase(HLAreads.get(k).charAt((int) pos_k))); - + //updates likelihoods for both permutations of the alleles, normalized to the likelihood for homozygous reference - if (!homref.equals(c1+c2)){ - if (Scores.containsKey(c1 + c2)){ - //out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2)); + if (Scores.containsKey(c1 + c2)){ + //out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2)); + if (!homref.equals(c1+c2) || Double.parseDouble((String) Scores.get(homref).toString()) != 0){ likelihood = Double.parseDouble((String) Scores.get(c1 + c2).toString()); LikelihoodScores[j][k] = LikelihoodScores[j][k] + likelihood; LOD[j][k]= LOD[j][k] + likelihood - homreflikelihood; - - } else{ - if (DEBUG){ - //out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k)); - } + } + } else{ + if (DEBUG){ + //out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k)); } } + } } } } if ( DEBUG ){ //Debugging: print updated likelihoods for 2 sets of HLA alleles, as well as normalized likelihoods for all 10 genotypes - out.printf("%s%s:%5.0f\t%s%s:%5.0f\t",r1,r2,LOD[j1][k1],s1,s2,LOD[j2][k2]); + out.printf("LOD[%s%s]:%5.1fL:%5.1fLOD[%s%s]:%5.1fL:%5.1f\t",r1,r2,LOD[j1][k1],LikelihoodScores[j1][k1],s1,s2,LOD[j2][k2],LikelihoodScores[j2][k2]); for ( DiploidGenotype g : DiploidGenotype.values() ) { - out.printf("%s %5.0f\t",g.toString(),Scores.get(g.toString())); + out.printf("%s %5.1f\t",g.toString(),Scores.get(g.toString())); } out.printf("\n"); } @@ -401,45 +413,6 @@ public class CallHLAWalker extends LocusWalker>{ return context.getReads().size(); } - private int GetPhaseScore(int alleleindex){ - int score = 0; - ArrayList SNPsInRead = new ArrayList(); - ArrayList readindex = new ArrayList(); - Hashtable indexer = new Hashtable(); - indexer.put('A', (Integer) 0); - indexer.put('C', (Integer) 1); - indexer.put('G', (Integer) 2); - indexer.put('T', (Integer) 3); - indexer.put('D', (Integer) 4); // D for deletion - - //Get HLA allele sequence and position given index - String allele = HLAreads.get(alleleindex); - int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex]; - - //Finds SNPs in allele - for (int i = allelestart; i <= allelestop; i++){ - if (SNPs.containsKey(String.valueOf(i))){ - //Stores matrix index - SNPsInRead.add((Integer) SNPs.get(String.valueOf(i))); - //stores position along read - readindex.add((Integer) i - allelestart); - } - } - - //sum score for every pair of SNPs in the allele - for (int i = 0; i < SNPsInRead.size(); i++){ - for (int j = i+1; j < SNPsInRead.size(); j ++){ - char c1 = allele.charAt((int) readindex.get(i)); - char c2 = allele.charAt((int) readindex.get(j)); - if (indexer.get(c1) != null && indexer.get(c2) != null){ - int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1); - int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2); - score += SNPcorrelation[a][b]; - } - } - } - return score; - } private void UpdateCorrelation(SAMRecord read, boolean PRINT){ //Updates correlation table with SNPs from specific read (for phasing) @@ -491,7 +464,138 @@ public class CallHLAWalker extends LocusWalker>{ } } } - + + + private int GetPhaseScore(int alleleindex){ + int score = 0; + ArrayList SNPsInRead = new ArrayList(); + ArrayList readindex = new ArrayList(); + Hashtable indexer = new Hashtable(); + indexer.put('A', (Integer) 0); + indexer.put('C', (Integer) 1); + indexer.put('G', (Integer) 2); + indexer.put('T', (Integer) 3); + indexer.put('D', (Integer) 4); // D for deletion + + //Get HLA allele sequence and position given index + String allele = HLAreads.get(alleleindex); + int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex]; + + //Finds SNPs in allele + for (int i = allelestart; i <= allelestop; i++){ + if (SNPs.containsKey(String.valueOf(i))){ + //Stores matrix index + SNPsInRead.add((Integer) SNPs.get(String.valueOf(i))); + //stores position along read + readindex.add((Integer) i - allelestart); + } + } + + //sum score for every pair of SNPs in the allele + for (int i = 0; i < SNPsInRead.size(); i++){ + for (int j = i+1; j < SNPsInRead.size(); j ++){ + char c1 = allele.charAt((int) readindex.get(i)); + char c2 = allele.charAt((int) readindex.get(j)); + if (indexer.get(c1) != null && indexer.get(c2) != null){ + int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1); + int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2); + score += SNPcorrelation[a][b]; + } + } + } + return score; + } + + private double GetPhaseProbability(int alleleindex){ + double prob = 1; + ArrayList SNPsInRead = new ArrayList(); + ArrayList readindex = new ArrayList(); + ArrayList Genotypes = new ArrayList(); + Hashtable indexer = new Hashtable(); + indexer.put('A', (Integer) 0); + indexer.put('C', (Integer) 1); + indexer.put('G', (Integer) 2); + indexer.put('T', (Integer) 3); + //indexer.put('D', (Integer) 4); // D for deletion + + //Get HLA allele sequence and position given index + String allele = HLAreads.get(alleleindex); + int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex]; + + //Finds SNPs in allele + for (int i = allelestart; i <= allelestop; i++){ + if (SNPs.containsKey(String.valueOf(i))){ + //Stores matrix index + SNPsInRead.add((Integer) SNPs.get(String.valueOf(i))); + //stores position along read + readindex.add((Integer) i - allelestart); + //stores genotypes at SNPs + Genotypes.add(SNPchars.get((Integer) SNPs.get(String.valueOf(i))-1)); + } + } + + char c1, c2, gi1, gi2, gj1, gj2; + int a, b, a1, a2, b1, b2; + double numerator, denominator; + //sum score for every pair of SNPs in the allele + for (int i = 0; i < SNPsInRead.size()-1; i++){ + int j = i + 1; + c1 = allele.charAt((int) readindex.get(i)); + c2 = allele.charAt((int) readindex.get(j)); + + gi1 = Genotypes.get(i).toCharArray()[0]; + gi2 = Genotypes.get(i).toCharArray()[1]; + gj1 = Genotypes.get(j).toCharArray()[0]; + gj2 = Genotypes.get(j).toCharArray()[1]; + + numerator = 0; denominator = 0; + if (indexer.get(c1) != null && indexer.get(c2) != null){ + a = (SNPsInRead.get(i)-1)*5; + b = (SNPsInRead.get(j)-1)*5; + for (int k = 0; k < 5; k++){ + for (int l = 0; l < 5; l++){ + if (DEBUG){}//out.printf("[%s,%s]=%s, sum=%s\n",a+k,b+l,SNPcorrelation[a+k][b+l],denominator);} + denominator = denominator + SNPcorrelation[a+k][b+l]; + } + } + if (denominator > 0){ + //indicies for genotypes at the 2 SNPs + a1 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi1); + b1 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj1); + a2 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi2); + b2 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj2); + + if (gi1 == gi2 && gj1 == gj2){ + if (c1 == gi1 && c2 == gj1){ + numerator = SNPcorrelation[a1][b1]; + } + } else if ((c1 == gi1 && c2 == gj1) || (c1 == gi2 && c2 == gj2)){ + numerator = SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]; + } else if((c1 == gi2 && c2 == gj1) || (c1 == gi1 && c2 == gj2)){ + numerator = SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2]; + } else { + if ((SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]) > (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2])){ + numerator = denominator - (SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]); + }else{ + numerator = denominator - (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2]); + } + } + + if (numerator == 0){ + prob = 0.01; + }else{ + prob = prob * numerator/denominator; + } + + if (DEBUG){ + out.printf("%s: %s,%s\tC1,C2=[%s,%s]\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t%s/%s=%.3f\tprob=%.3f\n",readindex.get(i)+allelestart,readindex.get(j)+allelestart,alleleindex,c1,c2,gi1,gj1,SNPcorrelation[a1][b1],gi1,gj2,SNPcorrelation[a1][b2],gi2,gj1,SNPcorrelation[a2][b1],gi2,gj2,SNPcorrelation[a2][b2],numerator,denominator,numerator/denominator,prob); + } + } + } + } + return prob; + } + public boolean isReduceByInterval() { return true; } @@ -509,13 +613,13 @@ public class CallHLAWalker extends LocusWalker>{ ArrayList TopAlleles = new ArrayList(); - double maxA = 0; int i_maxA =0; int j_maxA = 0; int maxAphase = 0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0; + double maxA = 0; int i_maxA =0; int j_maxA = 0; double maxAphase = 0.0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0; double maxProbA = 0.0; double maxA2 = 0; int i_maxA_2 =0; int j_maxA_2 = 0; - double maxB = 0; int i_maxB =0; int j_maxB = 0; int maxBphase = 0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0; + double maxB = 0; int i_maxB =0; int j_maxB = 0; double maxBphase = 0.0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0; double maxProbB = 0.0; double maxB2 = 0; int i_maxB_2 =0; int j_maxB_2 = 0; - double maxC = 0; int i_maxC =0; int j_maxC = 0; int maxCphase = 0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0; + double maxC = 0; int i_maxC =0; int j_maxC = 0; double maxCphase = 0.0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0; double maxProbC = 0.0; double maxC2 = 0; int i_maxC_2 =0; int j_maxC_2 = 0; @@ -549,44 +653,41 @@ public class CallHLAWalker extends LocusWalker>{ for (Integer i = 0; i < numHLAlleles; i++){ for (Integer j = i; j < numHLAlleles; j++){ if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1 && maxA > 0){ - if (maxA - LOD[i][j] <= 5 && maxA - LOD[i][j] >= 0){ + if (maxA - LOD[i][j] <= 5 && maxA >= LOD[i][j]){ inverseMaxProbA = inverseMaxProbA + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA); - if (DEBUG){ - out.printf("HLA-A: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA); - } - } - if (LOD[i][j] == maxA){ if (!TopAlleles.contains(i)){TopAlleles.add(i);} if (!TopAlleles.contains(j)){TopAlleles.add(j);} + if (DEBUG){ + out.printf("HLA-A: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,LOD[i][j],maxA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA); + } } } else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1 && maxB > 0){ if (maxB - LOD[i][j] <= 5 && maxB - LOD[i][j] >= 0){ inverseMaxProbB = inverseMaxProbB + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB); - if (DEBUG){ - out.printf("HLA-B: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodB,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbB); - } - } - if (LOD[i][j] == maxB){ if (!TopAlleles.contains(i)){TopAlleles.add(i);} if (!TopAlleles.contains(j)){TopAlleles.add(j);} + if (DEBUG){ + out.printf("HLA-B: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,LOD[i][j],maxB,LikelihoodScores[i][j]-maxlikelihoodB,inverseMaxProbB); + } } } else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1 && maxC > 0){ if (maxC - LOD[i][j] <= 5 && maxC - LOD[i][j] >= 0){ inverseMaxProbC = inverseMaxProbC + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC); - if (DEBUG){ - out.printf("HLA-C: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodC,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbC); - } - } - if (LOD[i][j] == maxC){ if (!TopAlleles.contains(i)){TopAlleles.add(i);} if (!TopAlleles.contains(j)){TopAlleles.add(j);} + if (DEBUG){ + out.printf("HLA-C: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,LOD[i][j],maxC,LikelihoodScores[i][j]-maxlikelihoodC,inverseMaxProbC); + } } + } } } out.printf("\nCalculating SNP correlation matrix for %s SNPs\n",SNPcount); - SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5]; + SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5]; //keep track of counts of each pair of SNPs + SNPcorrelationProb = new double[SNPs.size()][3]; // keep track of probabilities for specific haplotype at 2 SNPs. + //Create correlation matrix and update correlation scores for all reads @@ -633,12 +734,14 @@ public class CallHLAWalker extends LocusWalker>{ out.printf("Calculating phase scores for %s top alleles\n",TopAlleles.size()); //Calculate Phase score for each allele - int[] SinglePhaseScores = new int[TopAlleles.size()]; + double[] SinglePhaseScores = new double[TopAlleles.size()]; for (int i = 0; i < TopAlleles.size(); i++){ - SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i)); + //SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i)); + SinglePhaseScores[i] = GetPhaseProbability(TopAlleles.get(i)); + if ( DEBUG ){ //Debugging: print list of alleles to be checked for phasing - out.printf("index=%s\t%s\tscore=%s\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]); + out.printf("index=%s\t%s\tscore=%.3f\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]); } } @@ -647,68 +750,83 @@ public class CallHLAWalker extends LocusWalker>{ String alleleA, alleleB; Double freq1 = 0.0, freq2 = 0.0; Double ProbSumA = 0.0, ProbSumB = 0.0, ProbSumC = 0.0, likelihoodPrior; + Double PhaseSumA = 0.0, PhaseSumB = 0.0, PhaseSumC = 0.0; for (Integer i = 0; i < numHLAlleles; i++){ for (Integer j = i; j < numHLAlleles; j++){ if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){ - if (LOD[i][j] == maxA && maxA > 0){ - PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)]; + if ((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){ + PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)]; if (PhasingScores[i][j] > maxAphase){maxAphase = PhasingScores[i][j];} alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;} alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;} SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2; if (CombinedAlleleFrequencies[i][j] > maxAfreq){maxAfreq = CombinedAlleleFrequencies[i][j];} likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA; - ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j]; + ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]; + PhaseSumA = PhaseSumA + PhasingScores[i][j]; + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbA){maxProbA = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];} } } else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){ - if (LOD[i][j] == maxB && maxB > 0){ - PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)]; + if ((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){ + PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)]; if (PhasingScores[i][j] > maxBphase){maxBphase = PhasingScores[i][j];} alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;} alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;} SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2; if (freq1*freq2 > maxBfreq){maxBfreq = freq1*freq2;} likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB; - ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j]; + ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]; + PhaseSumB = PhaseSumB + PhasingScores[i][j]; + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbB){maxProbB = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];} } } else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){ - if (LOD[i][j] == maxC && maxC > 0){ - PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)]; + if ((LOD[i][j] >= maxC - 5)&& LOD[i][j] > 0){ + + PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)]; if (PhasingScores[i][j] > maxCphase){maxCphase = PhasingScores[i][j];} alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;} alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;} SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2; if (freq1*freq2 > maxCfreq){maxCfreq = freq1*freq2;} likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC; - ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j]; + ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]; + PhaseSumC = PhaseSumC + PhasingScores[i][j]; + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbC){maxProbC = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];} + if (DEBUG){ + out.printf("DEBUG: %s\t%s\tPhaseScore[%.3f]\t%.3f\n",alleleA,alleleB,PhasingScores[i][j],PhaseSumC); + } } } } } + if (DEBUG){ + out.printf("DEBUG: Phase sum for HLA-C: %.3f\n",PhaseSumC); + } + out.print("Printing results ...\n"); //Print allele pairs with highest likelihood score and highest phasing score - for (Integer i = 0; i < numHLAlleles; i++){ for (Integer j = i; j < numHLAlleles; j++){ if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){ - if(LOD[i][j] == maxA && maxA > 0 && PhasingScores[i][j] == maxAphase){ + if((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxAphase likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA; - out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA); - if (CombinedAlleleFrequencies[i][j] == maxAfreq){out.printf("\tBEST");} + //out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%.3f\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA); + out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumA,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumA); + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbA){out.printf("\tBEST");} out.printf("\n"); } } else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){ - if(LOD[i][j] == maxB && maxB > 0 && PhasingScores[i][j] == maxBphase){ + if((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxBphase likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB; - out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,inverseMaxProbB,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumB); - if (CombinedAlleleFrequencies[i][j] == maxBfreq){out.printf("\tBEST");} + out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumB,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumB); + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbB){out.printf("\tBEST");} out.printf("\n"); } } else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){ - if(LOD[i][j] == maxC && maxC > 0 && PhasingScores[i][j] == maxCphase){ + if((LOD[i][j] >= maxC - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxCphase likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC; - out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,inverseMaxProbC,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumC); - if (CombinedAlleleFrequencies[i][j] == maxCfreq){out.printf("\tBEST");} + out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumC,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumC); + if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbC){out.printf("\tBEST");} out.printf("\n"); } }