Combined Scores, bug fixed for printing HLA-C
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1685 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
682b765536
commit
22932042ea
|
|
@ -36,12 +36,14 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
public boolean suppressPrinting = false;
|
||||
|
||||
//String HLAdatabaseFile = "/Users/shermanjia/Work/HLA.sam";
|
||||
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.sam";
|
||||
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.4digitUnique.sam";
|
||||
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.sam";
|
||||
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam";
|
||||
//String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.4digitUnique.sam";
|
||||
//String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/CEU_Founders_HLA.freq";
|
||||
|
||||
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
||||
|
||||
|
||||
|
||||
ArrayList<String> HLAreads = new ArrayList<String>();
|
||||
|
|
@ -59,7 +61,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
double[] SingleAlleleFrequencies;
|
||||
double[][] LOD; String[][] Alleles;
|
||||
double[][] LikelihoodScores;
|
||||
int[][] PhasingScores;
|
||||
double[][] PhasingScores;
|
||||
double[][]PhasingProbabilities;
|
||||
double[][] CombinedAlleleFrequencies;
|
||||
int j1, k1, j2, k2;
|
||||
int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1;
|
||||
|
|
@ -70,9 +73,12 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
|
||||
Hashtable Scores = new Hashtable();
|
||||
Hashtable SNPs = new Hashtable();
|
||||
ArrayList<String> SNPchars = new ArrayList<String>();
|
||||
ArrayList<Integer> SNPlocations = new ArrayList<Integer>();
|
||||
Integer SNPcount = 0;
|
||||
int[][] SNPcorrelation;
|
||||
double[][] SNPcorrelationProb;
|
||||
String[][] SNPhaplotypes;
|
||||
|
||||
boolean DatabaseLoaded = false;
|
||||
boolean PrintedOutput = false;
|
||||
|
|
@ -116,7 +122,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
SingleAlleleFrequencies = new double[n];
|
||||
LOD = new double[n][n];
|
||||
LikelihoodScores = new double[n][n];
|
||||
PhasingScores = new int[n][n];
|
||||
PhasingScores = new double[n][n];
|
||||
PhasingProbabilities = new double[n][n];
|
||||
CombinedAlleleFrequencies = new double[n][n];
|
||||
|
||||
for (i = 0; i < n; i++){
|
||||
|
|
@ -128,17 +135,18 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
for (int j = 0; j <n; j++){
|
||||
LOD[i][j]=0;
|
||||
LikelihoodScores[i][j]=0;
|
||||
PhasingScores[i][j]=0;
|
||||
PhasingScores[i][j]=0.0;
|
||||
PhasingProbabilities[i][j]=0;
|
||||
CombinedAlleleFrequencies[i][j]=0.0;
|
||||
}
|
||||
//For debugging: get index for specific alleles
|
||||
if (HLAnames.get(i).equals("HLA_B*0801"))
|
||||
if (HLAnames.get(i).equals("HLA_A*3101"))
|
||||
j1 = i;
|
||||
if (HLAnames.get(i).equals("HLA_B*5601"))
|
||||
if (HLAnames.get(i).equals("HLA_A*3201"))
|
||||
k1 = i;
|
||||
if (HLAnames.get(i).equals("HLA_B*0766"))
|
||||
if (HLAnames.get(i).equals("HLA_A*3121"))
|
||||
j2 = i;
|
||||
if (HLAnames.get(i).equals("HLA_B*0726"))
|
||||
if (HLAnames.get(i).equals("HLA_A*3207"))
|
||||
k2 = i;
|
||||
}
|
||||
out.printf("DONE! Read %s alleles\n",HLAreads.size());
|
||||
|
|
@ -214,8 +222,8 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
|
||||
cigarPlaceholder = i+1;
|
||||
readPlaceholder = readPlaceholder + subreadLength;
|
||||
} else if (c == 'H'){
|
||||
//(Headers removed here)***
|
||||
} else if (c == 'H' || c == 'S'){
|
||||
//(H = Headers or S = Soft clipped removed here)***
|
||||
|
||||
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
|
||||
count = cigar.substring(cigarPlaceholder, i);
|
||||
|
|
@ -294,14 +302,15 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
|
||||
//Debugging purposes
|
||||
if (DEBUG) {out.printf("A[%s]\tC[%s]\tT[%s]\tg[%s]\t",numAs,numCs,numTs,numGs);}
|
||||
if (DEBUG) {out.printf("A[%s]C[%s]T[%s]G[%s]\t",numAs,numCs,numTs,numGs);}
|
||||
|
||||
if (depth > 0){
|
||||
//Store confidence scores - this is a local hash that we use to get likelihood given a particular genotype
|
||||
Scores = new Hashtable();
|
||||
Double likelihood = 0.0;
|
||||
Double likelihood = 0.0; double maxlikelihood = 0.0;
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
likelihood = G.getLikelihood(g);
|
||||
if (maxlikelihood == 0.0 || likelihood > maxlikelihood){maxlikelihood = likelihood;}
|
||||
Scores.put(g.toString(), likelihood);
|
||||
//also hash other combination not stored by DiploidGenotype
|
||||
if (g.toString().equals("AC")) {
|
||||
|
|
@ -326,8 +335,11 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
//Add SNP if it is a SNP and hasn't been added before
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
likelihood = G.getLikelihood(g);
|
||||
if (likelihood > homreflikelihood && !SNPs.containsKey(Long.toString(loc))){
|
||||
SNPcount++; SNPs.put(Long.toString(loc),SNPcount); SNPlocations.add(Integer.valueOf(Long.toString(loc)));
|
||||
if ((likelihood > homreflikelihood) && (likelihood == maxlikelihood) && (!SNPs.containsKey(Long.toString(loc)))){
|
||||
SNPcount++;
|
||||
SNPs.put(Long.toString(loc),SNPcount);
|
||||
SNPlocations.add(Integer.valueOf(Long.toString(loc)));
|
||||
SNPchars.add(g.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -369,30 +381,30 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
if (loc >= HLAstartpos[k] && loc <= HLAstoppos[k]){
|
||||
pos_k = loc - HLAstartpos[k];
|
||||
c2 = Character.toString(Character.toUpperCase(HLAreads.get(k).charAt((int) pos_k)));
|
||||
|
||||
|
||||
//updates likelihoods for both permutations of the alleles, normalized to the likelihood for homozygous reference
|
||||
if (!homref.equals(c1+c2)){
|
||||
if (Scores.containsKey(c1 + c2)){
|
||||
//out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2));
|
||||
if (Scores.containsKey(c1 + c2)){
|
||||
//out.printf("j[%s],k[%s],g[%s],s[%.0f]\t",j,k,c1+c2,Scores.get(c1 + c2));
|
||||
if (!homref.equals(c1+c2) || Double.parseDouble((String) Scores.get(homref).toString()) != 0){
|
||||
likelihood = Double.parseDouble((String) Scores.get(c1 + c2).toString());
|
||||
LikelihoodScores[j][k] = LikelihoodScores[j][k] + likelihood;
|
||||
LOD[j][k]= LOD[j][k] + likelihood - homreflikelihood;
|
||||
|
||||
} else{
|
||||
if (DEBUG){
|
||||
//out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k));
|
||||
}
|
||||
}
|
||||
} else{
|
||||
if (DEBUG){
|
||||
//out.printf("\nCharacters [%s] not found,j[%s],k[%s],%s,%s\n",c1+c2,j,k,HLAnames.get(j),HLAnames.get(k));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( DEBUG ){
|
||||
//Debugging: print updated likelihoods for 2 sets of HLA alleles, as well as normalized likelihoods for all 10 genotypes
|
||||
out.printf("%s%s:%5.0f\t%s%s:%5.0f\t",r1,r2,LOD[j1][k1],s1,s2,LOD[j2][k2]);
|
||||
out.printf("LOD[%s%s]:%5.1fL:%5.1fLOD[%s%s]:%5.1fL:%5.1f\t",r1,r2,LOD[j1][k1],LikelihoodScores[j1][k1],s1,s2,LOD[j2][k2],LikelihoodScores[j2][k2]);
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
out.printf("%s %5.0f\t",g.toString(),Scores.get(g.toString()));
|
||||
out.printf("%s %5.1f\t",g.toString(),Scores.get(g.toString()));
|
||||
}
|
||||
out.printf("\n");
|
||||
}
|
||||
|
|
@ -401,45 +413,6 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
return context.getReads().size();
|
||||
}
|
||||
|
||||
private int GetPhaseScore(int alleleindex){
|
||||
int score = 0;
|
||||
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
|
||||
ArrayList<Integer> readindex = new ArrayList<Integer>();
|
||||
Hashtable indexer = new Hashtable();
|
||||
indexer.put('A', (Integer) 0);
|
||||
indexer.put('C', (Integer) 1);
|
||||
indexer.put('G', (Integer) 2);
|
||||
indexer.put('T', (Integer) 3);
|
||||
indexer.put('D', (Integer) 4); // D for deletion
|
||||
|
||||
//Get HLA allele sequence and position given index
|
||||
String allele = HLAreads.get(alleleindex);
|
||||
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
|
||||
|
||||
//Finds SNPs in allele
|
||||
for (int i = allelestart; i <= allelestop; i++){
|
||||
if (SNPs.containsKey(String.valueOf(i))){
|
||||
//Stores matrix index
|
||||
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
|
||||
//stores position along read
|
||||
readindex.add((Integer) i - allelestart);
|
||||
}
|
||||
}
|
||||
|
||||
//sum score for every pair of SNPs in the allele
|
||||
for (int i = 0; i < SNPsInRead.size(); i++){
|
||||
for (int j = i+1; j < SNPsInRead.size(); j ++){
|
||||
char c1 = allele.charAt((int) readindex.get(i));
|
||||
char c2 = allele.charAt((int) readindex.get(j));
|
||||
if (indexer.get(c1) != null && indexer.get(c2) != null){
|
||||
int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1);
|
||||
int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2);
|
||||
score += SNPcorrelation[a][b];
|
||||
}
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
private void UpdateCorrelation(SAMRecord read, boolean PRINT){
|
||||
//Updates correlation table with SNPs from specific read (for phasing)
|
||||
|
|
@ -491,7 +464,138 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private int GetPhaseScore(int alleleindex){
|
||||
int score = 0;
|
||||
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
|
||||
ArrayList<Integer> readindex = new ArrayList<Integer>();
|
||||
Hashtable indexer = new Hashtable();
|
||||
indexer.put('A', (Integer) 0);
|
||||
indexer.put('C', (Integer) 1);
|
||||
indexer.put('G', (Integer) 2);
|
||||
indexer.put('T', (Integer) 3);
|
||||
indexer.put('D', (Integer) 4); // D for deletion
|
||||
|
||||
//Get HLA allele sequence and position given index
|
||||
String allele = HLAreads.get(alleleindex);
|
||||
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
|
||||
|
||||
//Finds SNPs in allele
|
||||
for (int i = allelestart; i <= allelestop; i++){
|
||||
if (SNPs.containsKey(String.valueOf(i))){
|
||||
//Stores matrix index
|
||||
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
|
||||
//stores position along read
|
||||
readindex.add((Integer) i - allelestart);
|
||||
}
|
||||
}
|
||||
|
||||
//sum score for every pair of SNPs in the allele
|
||||
for (int i = 0; i < SNPsInRead.size(); i++){
|
||||
for (int j = i+1; j < SNPsInRead.size(); j ++){
|
||||
char c1 = allele.charAt((int) readindex.get(i));
|
||||
char c2 = allele.charAt((int) readindex.get(j));
|
||||
if (indexer.get(c1) != null && indexer.get(c2) != null){
|
||||
int a = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(c1);
|
||||
int b = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(c2);
|
||||
score += SNPcorrelation[a][b];
|
||||
}
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
private double GetPhaseProbability(int alleleindex){
|
||||
double prob = 1;
|
||||
ArrayList<Integer> SNPsInRead = new ArrayList<Integer>();
|
||||
ArrayList<Integer> readindex = new ArrayList<Integer>();
|
||||
ArrayList<String> Genotypes = new ArrayList<String>();
|
||||
Hashtable indexer = new Hashtable();
|
||||
indexer.put('A', (Integer) 0);
|
||||
indexer.put('C', (Integer) 1);
|
||||
indexer.put('G', (Integer) 2);
|
||||
indexer.put('T', (Integer) 3);
|
||||
//indexer.put('D', (Integer) 4); // D for deletion
|
||||
|
||||
//Get HLA allele sequence and position given index
|
||||
String allele = HLAreads.get(alleleindex);
|
||||
int allelestart = HLAstartpos[alleleindex], allelestop = HLAstoppos[alleleindex];
|
||||
|
||||
//Finds SNPs in allele
|
||||
for (int i = allelestart; i <= allelestop; i++){
|
||||
if (SNPs.containsKey(String.valueOf(i))){
|
||||
//Stores matrix index
|
||||
SNPsInRead.add((Integer) SNPs.get(String.valueOf(i)));
|
||||
//stores position along read
|
||||
readindex.add((Integer) i - allelestart);
|
||||
//stores genotypes at SNPs
|
||||
Genotypes.add(SNPchars.get((Integer) SNPs.get(String.valueOf(i))-1));
|
||||
}
|
||||
}
|
||||
|
||||
char c1, c2, gi1, gi2, gj1, gj2;
|
||||
int a, b, a1, a2, b1, b2;
|
||||
double numerator, denominator;
|
||||
//sum score for every pair of SNPs in the allele
|
||||
for (int i = 0; i < SNPsInRead.size()-1; i++){
|
||||
int j = i + 1;
|
||||
c1 = allele.charAt((int) readindex.get(i));
|
||||
c2 = allele.charAt((int) readindex.get(j));
|
||||
|
||||
gi1 = Genotypes.get(i).toCharArray()[0];
|
||||
gi2 = Genotypes.get(i).toCharArray()[1];
|
||||
gj1 = Genotypes.get(j).toCharArray()[0];
|
||||
gj2 = Genotypes.get(j).toCharArray()[1];
|
||||
|
||||
numerator = 0; denominator = 0;
|
||||
if (indexer.get(c1) != null && indexer.get(c2) != null){
|
||||
a = (SNPsInRead.get(i)-1)*5;
|
||||
b = (SNPsInRead.get(j)-1)*5;
|
||||
for (int k = 0; k < 5; k++){
|
||||
for (int l = 0; l < 5; l++){
|
||||
if (DEBUG){}//out.printf("[%s,%s]=%s, sum=%s\n",a+k,b+l,SNPcorrelation[a+k][b+l],denominator);}
|
||||
denominator = denominator + SNPcorrelation[a+k][b+l];
|
||||
}
|
||||
}
|
||||
if (denominator > 0){
|
||||
//indicies for genotypes at the 2 SNPs
|
||||
a1 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi1);
|
||||
b1 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj1);
|
||||
a2 = (SNPsInRead.get(i)-1)*5 + (Integer) indexer.get(gi2);
|
||||
b2 = (SNPsInRead.get(j)-1)*5 + (Integer) indexer.get(gj2);
|
||||
|
||||
if (gi1 == gi2 && gj1 == gj2){
|
||||
if (c1 == gi1 && c2 == gj1){
|
||||
numerator = SNPcorrelation[a1][b1];
|
||||
}
|
||||
} else if ((c1 == gi1 && c2 == gj1) || (c1 == gi2 && c2 == gj2)){
|
||||
numerator = SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2];
|
||||
} else if((c1 == gi2 && c2 == gj1) || (c1 == gi1 && c2 == gj2)){
|
||||
numerator = SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2];
|
||||
} else {
|
||||
if ((SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]) > (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2])){
|
||||
numerator = denominator - (SNPcorrelation[a1][b1] + SNPcorrelation[a2][b2]);
|
||||
}else{
|
||||
numerator = denominator - (SNPcorrelation[a2][b1] + SNPcorrelation[a1][b2]);
|
||||
}
|
||||
}
|
||||
|
||||
if (numerator == 0){
|
||||
prob = 0.01;
|
||||
}else{
|
||||
prob = prob * numerator/denominator;
|
||||
}
|
||||
|
||||
if (DEBUG){
|
||||
out.printf("%s: %s,%s\tC1,C2=[%s,%s]\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t[%s%s]=%s\t%s/%s=%.3f\tprob=%.3f\n",readindex.get(i)+allelestart,readindex.get(j)+allelestart,alleleindex,c1,c2,gi1,gj1,SNPcorrelation[a1][b1],gi1,gj2,SNPcorrelation[a1][b2],gi2,gj1,SNPcorrelation[a2][b1],gi2,gj2,SNPcorrelation[a2][b2],numerator,denominator,numerator/denominator,prob);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return prob;
|
||||
}
|
||||
|
||||
public boolean isReduceByInterval() {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -509,13 +613,13 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
|
||||
ArrayList<Integer> TopAlleles = new ArrayList<Integer>();
|
||||
|
||||
double maxA = 0; int i_maxA =0; int j_maxA = 0; int maxAphase = 0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0;
|
||||
double maxA = 0; int i_maxA =0; int j_maxA = 0; double maxAphase = 0.0; double maxAfreq = 0.0; double maxlikelihoodA = 0.0; double maxProbA = 0.0;
|
||||
double maxA2 = 0; int i_maxA_2 =0; int j_maxA_2 = 0;
|
||||
|
||||
double maxB = 0; int i_maxB =0; int j_maxB = 0; int maxBphase = 0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0;
|
||||
double maxB = 0; int i_maxB =0; int j_maxB = 0; double maxBphase = 0.0; double maxBfreq = 0.0; double maxlikelihoodB = 0.0; double maxProbB = 0.0;
|
||||
double maxB2 = 0; int i_maxB_2 =0; int j_maxB_2 = 0;
|
||||
|
||||
double maxC = 0; int i_maxC =0; int j_maxC = 0; int maxCphase = 0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0;
|
||||
double maxC = 0; int i_maxC =0; int j_maxC = 0; double maxCphase = 0.0; double maxCfreq = 0.0; double maxlikelihoodC = 0.0; double maxProbC = 0.0;
|
||||
double maxC2 = 0; int i_maxC_2 =0; int j_maxC_2 = 0;
|
||||
|
||||
|
||||
|
|
@ -549,44 +653,41 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
for (Integer i = 0; i < numHLAlleles; i++){
|
||||
for (Integer j = i; j < numHLAlleles; j++){
|
||||
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1 && maxA > 0){
|
||||
if (maxA - LOD[i][j] <= 5 && maxA - LOD[i][j] >= 0){
|
||||
if (maxA - LOD[i][j] <= 5 && maxA >= LOD[i][j]){
|
||||
inverseMaxProbA = inverseMaxProbA + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA);
|
||||
if (DEBUG){
|
||||
out.printf("HLA-A: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA);
|
||||
}
|
||||
}
|
||||
if (LOD[i][j] == maxA){
|
||||
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
|
||||
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
|
||||
if (DEBUG){
|
||||
out.printf("HLA-A: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,LOD[i][j],maxA,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbA);
|
||||
}
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1 && maxB > 0){
|
||||
if (maxB - LOD[i][j] <= 5 && maxB - LOD[i][j] >= 0){
|
||||
inverseMaxProbB = inverseMaxProbB + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB);
|
||||
if (DEBUG){
|
||||
out.printf("HLA-B: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodB,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbB);
|
||||
}
|
||||
}
|
||||
if (LOD[i][j] == maxB){
|
||||
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
|
||||
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
|
||||
if (DEBUG){
|
||||
out.printf("HLA-B: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,LOD[i][j],maxB,LikelihoodScores[i][j]-maxlikelihoodB,inverseMaxProbB);
|
||||
}
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1 && maxC > 0){
|
||||
if (maxC - LOD[i][j] <= 5 && maxC - LOD[i][j] >= 0){
|
||||
inverseMaxProbC = inverseMaxProbC + java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC);
|
||||
if (DEBUG){
|
||||
out.printf("HLA-C: likelihood=%.5f\tmaxlikelihood=%.5f\tdelta_likelihood=%.5f\tinvP=%.5f\n",LikelihoodScores[i][j],maxlikelihoodC,LikelihoodScores[i][j]-maxlikelihoodA,inverseMaxProbC);
|
||||
}
|
||||
}
|
||||
if (LOD[i][j] == maxC){
|
||||
if (!TopAlleles.contains(i)){TopAlleles.add(i);}
|
||||
if (!TopAlleles.contains(j)){TopAlleles.add(j);}
|
||||
if (DEBUG){
|
||||
out.printf("HLA-C: %s, %s \tlikelihood=%.2f\tmax=%.2f\tLOD=%.2f\tmaxLOD=%.2f\tdelta_likelihood=%.2f\tinvP=%.2f\n",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,LOD[i][j],maxC,LikelihoodScores[i][j]-maxlikelihoodC,inverseMaxProbC);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.printf("\nCalculating SNP correlation matrix for %s SNPs\n",SNPcount);
|
||||
SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5];
|
||||
SNPcorrelation = new int[SNPs.size()*5][SNPs.size()*5]; //keep track of counts of each pair of SNPs
|
||||
SNPcorrelationProb = new double[SNPs.size()][3]; // keep track of probabilities for specific haplotype at 2 SNPs.
|
||||
|
||||
|
||||
|
||||
//Create correlation matrix and update correlation scores for all reads
|
||||
|
|
@ -633,12 +734,14 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
out.printf("Calculating phase scores for %s top alleles\n",TopAlleles.size());
|
||||
|
||||
//Calculate Phase score for each allele
|
||||
int[] SinglePhaseScores = new int[TopAlleles.size()];
|
||||
double[] SinglePhaseScores = new double[TopAlleles.size()];
|
||||
for (int i = 0; i < TopAlleles.size(); i++){
|
||||
SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i));
|
||||
//SinglePhaseScores[i] = GetPhaseScore(TopAlleles.get(i));
|
||||
SinglePhaseScores[i] = GetPhaseProbability(TopAlleles.get(i));
|
||||
|
||||
if ( DEBUG ){
|
||||
//Debugging: print list of alleles to be checked for phasing
|
||||
out.printf("index=%s\t%s\tscore=%s\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]);
|
||||
out.printf("index=%s\t%s\tscore=%.3f\n",TopAlleles.get(i),HLAnames.get(TopAlleles.get(i)),SinglePhaseScores[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -647,68 +750,83 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
String alleleA, alleleB;
|
||||
Double freq1 = 0.0, freq2 = 0.0;
|
||||
Double ProbSumA = 0.0, ProbSumB = 0.0, ProbSumC = 0.0, likelihoodPrior;
|
||||
Double PhaseSumA = 0.0, PhaseSumB = 0.0, PhaseSumC = 0.0;
|
||||
for (Integer i = 0; i < numHLAlleles; i++){
|
||||
for (Integer j = i; j < numHLAlleles; j++){
|
||||
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){
|
||||
if (LOD[i][j] == maxA && maxA > 0){
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if ((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if (PhasingScores[i][j] > maxAphase){maxAphase = PhasingScores[i][j];}
|
||||
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
|
||||
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
|
||||
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
|
||||
if (CombinedAlleleFrequencies[i][j] > maxAfreq){maxAfreq = CombinedAlleleFrequencies[i][j];}
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA;
|
||||
ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j];
|
||||
ProbSumA = ProbSumA + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
|
||||
PhaseSumA = PhaseSumA + PhasingScores[i][j];
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbA){maxProbA = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){
|
||||
if (LOD[i][j] == maxB && maxB > 0){
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if ((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if (PhasingScores[i][j] > maxBphase){maxBphase = PhasingScores[i][j];}
|
||||
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
|
||||
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
|
||||
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
|
||||
if (freq1*freq2 > maxBfreq){maxBfreq = freq1*freq2;}
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB;
|
||||
ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j];
|
||||
ProbSumB = ProbSumB + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
|
||||
PhaseSumB = PhaseSumB + PhasingScores[i][j];
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbB){maxProbB = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){
|
||||
if (LOD[i][j] == maxC && maxC > 0){
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] + SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if ((LOD[i][j] >= maxC - 5)&& LOD[i][j] > 0){
|
||||
|
||||
PhasingScores[i][j]= SinglePhaseScores[TopAlleles.indexOf(i)] * SinglePhaseScores[TopAlleles.indexOf(j)];
|
||||
if (PhasingScores[i][j] > maxCphase){maxCphase = PhasingScores[i][j];}
|
||||
alleleA=HLAnames.get(i).substring(4); if (AlleleFrequencies.containsKey(alleleA)){freq1 = Double.parseDouble((String) AlleleFrequencies.get(alleleA).toString());}else{freq1=0.0001;}
|
||||
alleleB=HLAnames.get(j).substring(4); if (AlleleFrequencies.containsKey(alleleB)){freq2 = Double.parseDouble((String) AlleleFrequencies.get(alleleB).toString());}else{freq2=0.0001;}
|
||||
SingleAlleleFrequencies[i]=freq1; SingleAlleleFrequencies[j]=freq2; CombinedAlleleFrequencies[i][j]=freq1*freq2;
|
||||
if (freq1*freq2 > maxCfreq){maxCfreq = freq1*freq2;}
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC;
|
||||
ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j];
|
||||
ProbSumC = ProbSumC + likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];
|
||||
PhaseSumC = PhaseSumC + PhasingScores[i][j];
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] > maxProbC){maxProbC = likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j];}
|
||||
if (DEBUG){
|
||||
out.printf("DEBUG: %s\t%s\tPhaseScore[%.3f]\t%.3f\n",alleleA,alleleB,PhasingScores[i][j],PhaseSumC);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG){
|
||||
out.printf("DEBUG: Phase sum for HLA-C: %.3f\n",PhaseSumC);
|
||||
}
|
||||
out.print("Printing results ...\n");
|
||||
//Print allele pairs with highest likelihood score and highest phasing score
|
||||
|
||||
for (Integer i = 0; i < numHLAlleles; i++){
|
||||
for (Integer j = i; j < numHLAlleles; j++){
|
||||
if (HLAnames.get(i).indexOf("HLA_A") > -1 && HLAnames.get(j).indexOf("HLA_A") > -1){
|
||||
if(LOD[i][j] == maxA && maxA > 0 && PhasingScores[i][j] == maxAphase){
|
||||
if((LOD[i][j] >= maxA - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxAphase
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodA)/inverseMaxProbA;
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA);
|
||||
if (CombinedAlleleFrequencies[i][j] == maxAfreq){out.printf("\tBEST");}
|
||||
//out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%.3f\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodA,inverseMaxProbA,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumA);
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumA,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumA);
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbA){out.printf("\tBEST");}
|
||||
out.printf("\n");
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_B") > -1 && HLAnames.get(j).indexOf("HLA_B") > -1){
|
||||
if(LOD[i][j] == maxB && maxB > 0 && PhasingScores[i][j] == maxBphase){
|
||||
if((LOD[i][j] >= maxB - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxBphase
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodB)/inverseMaxProbB;
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodB,inverseMaxProbB,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumB);
|
||||
if (CombinedAlleleFrequencies[i][j] == maxBfreq){out.printf("\tBEST");}
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumB,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumB);
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbB){out.printf("\tBEST");}
|
||||
out.printf("\n");
|
||||
}
|
||||
} else if (HLAnames.get(i).indexOf("HLA_C") > -1 && HLAnames.get(j).indexOf("HLA_C") > -1){
|
||||
if(LOD[i][j] == maxC && maxC > 0 && PhasingScores[i][j] == maxCphase){
|
||||
if((LOD[i][j] >= maxC - 5) && LOD[i][j] > 0){ // && PhasingScores[i][j] == maxCphase
|
||||
likelihoodPrior = java.lang.Math.pow(10,LikelihoodScores[i][j]-maxlikelihoodC)/inverseMaxProbC;
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tmax=%5.0f\tinvP=%.2f\tPrior=%.3f\tPhase=%s\tfreq1=%s\tfreq2=%s\tf1*f2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],maxlikelihoodC,inverseMaxProbC,likelihoodPrior,PhasingScores[i][j],SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]/ProbSumC);
|
||||
if (CombinedAlleleFrequencies[i][j] == maxCfreq){out.printf("\tBEST");}
|
||||
out.printf("%s\t%s\tloglikelihood=%5.0f\tP(SSG)=%.3f\tP(Phase)=%.3f\tF1=%s\tF2=%s\tF1*F2=%.8f\tProb=%.3f",HLAnames.get(i),HLAnames.get(j),LikelihoodScores[i][j],likelihoodPrior,PhasingScores[i][j]/PhaseSumC,SingleAlleleFrequencies[i],SingleAlleleFrequencies[j],CombinedAlleleFrequencies[i][j],likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j]/ProbSumC);
|
||||
if (likelihoodPrior*CombinedAlleleFrequencies[i][j]*PhasingScores[i][j] == maxProbC){out.printf("\tBEST");}
|
||||
out.printf("\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue