Updated documentation

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2364 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
sjia 2009-12-15 20:41:35 +00:00
parent d8cfd707bc
commit 5974c42468
1 changed files with 51 additions and 31 deletions

View File

@ -1,6 +1,6 @@
/* /*
* To change this template, choose Tools | Templates * Calculates likelihood of observing the data given pairs of HLA alleles. NOTE: run CalculateBaseLikelihoods first! Usage: java -jar GenomeAnalysisTK.jar -T CalculateAlleleLikelihoods -I /humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.imputed.4digit.bam -R /broad/1KG/reference/human_b36_both.fasta -L /humgen/gsa-scr1/GSA/sjia/454_HLA/HAPMAP270/HLA_exons.interval -bl INPUT.baselikelihoods -eth\
* and open the template in the editor. nicity Caucasian | grep -v "INFO" | grep -v "DONE!" > OUTPUT.allelelikelihoods
*/ */
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
@ -24,16 +24,20 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker<Integer, Intege
@Argument(fullName = "debugHLA", shortName = "debugHLA", doc = "Print debug", required = false) @Argument(fullName = "debugHLA", shortName = "debugHLA", doc = "Print debug", required = false)
public boolean DEBUG = false; public boolean DEBUG = false;
@Argument(fullName = "debugAlleles", shortName = "debugAlleles", doc = "Print likelihood scores for these alleles", required = false)
public String debugAlleles = "";
@Argument(fullName = "onlyfrequent", shortName = "onlyfrequent", doc = "Only consider alleles with frequency > 0.0001", required = false) @Argument(fullName = "onlyfrequent", shortName = "onlyfrequent", doc = "Only consider alleles with frequency > 0.0001", required = false)
public boolean FREQUENT = false; public boolean FREQUENT = false;
@Argument(fullName = "ethnicity", shortName = "ethnicity", doc = "Use allele frequencies for this ethnic group", required = false) @Argument(fullName = "ethnicity", shortName = "ethnicity", doc = "Use allele frequencies for this ethnic group", required = false)
public String ethnicity = "Caucasian"; public String ethnicity = "CaucasianUSA";
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq"; String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq"; String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
String AlleleFrequencyFile; String AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
Hashtable AlleleFrequencies; String UniqueAllelesFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/UniqueAlleles";
Hashtable AlleleFrequencies,UniqueAlleles;
CigarParser formatter = new CigarParser(); CigarParser formatter = new CigarParser();
double[][] baseLikelihoods; double[][] baseLikelihoods;
@ -58,15 +62,14 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker<Integer, Intege
HLAstartposAL = new ArrayList<Integer>(); HLAstartposAL = new ArrayList<Integer>();
HLAstopposAL = new ArrayList<Integer>(); HLAstopposAL = new ArrayList<Integer>();
if (ethnicity.equals("Black")){ if (!ethnicity.equals("CaucasianUSA")){
AlleleFrequencyFile = BlackAlleleFrequencyFile; AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_" + ethnicity + ".freq";
}else{
AlleleFrequencyFile = CaucasianAlleleFrequencyFile;
} }
out.printf("INFO Reading HLA allele frequencies ... "); out.printf("INFO Reading HLA allele frequencies ... ");
FrequencyFileReader HLAfreqReader = new FrequencyFileReader(); FrequencyFileReader HLAfreqReader = new FrequencyFileReader();
HLAfreqReader.ReadFile(AlleleFrequencyFile); HLAfreqReader.ReadFile(AlleleFrequencyFile,UniqueAllelesFile);
AlleleFrequencies = HLAfreqReader.GetAlleleFrequencies(); AlleleFrequencies = HLAfreqReader.GetAlleleFrequencies();
UniqueAlleles = HLAfreqReader.GetUniqueAlleles();
out.printf("Done! Frequencies for %s HLA alleles loaded.\n",AlleleFrequencies.size()); out.printf("Done! Frequencies for %s HLA alleles loaded.\n",AlleleFrequencies.size());
out.printf("INFO Reading HLA dictionary ..."); out.printf("INFO Reading HLA dictionary ...");
@ -141,39 +144,57 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker<Integer, Intege
} }
int numcombinations = 0; int numcombinations = 0;
out.printf("NUM\tAllele1\tAllele2\tSSG\n"); out.printf("NUM\tAllele1\tAllele2\tSSG\n");
int index1 = -1, index2 = -1;
if (!debugAlleles.equals("")){
String s[] = debugAlleles.split(",");
for (int i = 0; i < numreads; i++){
if (HLAnames[i].equals(s[0])){
index1 = i;
}
if (HLAnames[i].equals(s[1])){
index2 = i;
}
if (index1 > -1 && index2 > -1){
out.printf("INFO: debugging %s\t%s\t%s\t%s\n",s[0],s[1],index1,index2);
double dl = CalculateLikelihood(index1,index2,true);
break;
}
}
}
for (int i = 0; i < numreads; i++){ for (int i = 0; i < numreads; i++){
name1 = HLAnames[i].substring(4); name1 = HLAnames[i].substring(4);
if (AlleleFrequencies.containsKey(name1)){ if (UniqueAlleles.containsKey(name1)){
frq1 = Double.parseDouble((String) AlleleFrequencies.get(name1).toString()); //frq1 = Double.parseDouble((String) AlleleFrequencies.get(name1).toString());
if (frq1 > minfrq){ //if (frq1 > minfrq){
for (int j = i; j < numreads; j++){ for (int j = i; j < numreads; j++){
name2 = HLAnames[j].substring(4); name2 = HLAnames[j].substring(4);
if (AlleleFrequencies.containsKey(name2)){ if (UniqueAlleles.containsKey(name2)){
frq2 = Double.parseDouble((String) AlleleFrequencies.get(name2).toString()); // frq2 = Double.parseDouble((String) AlleleFrequencies.get(name2).toString());
if (frq2 > minfrq){ // if (frq2 > minfrq){
if ((HLAstartpos[i] < HLAstoppos[j]) && (HLAstartpos[j] < HLAstoppos[i])){ if ((HLAstartpos[i] < HLAstoppos[j]) && (HLAstartpos[j] < HLAstoppos[i])){
numcombinations++; numcombinations++;
AlleleLikelihoods[i][j] = CalculateLikelihood(i,j); AlleleLikelihoods[i][j] = CalculateLikelihood(i,j,false);
out.printf("%s\t%s\t%s\t%.2f\n",numcombinations,name1,name2,AlleleLikelihoods[i][j]); out.printf("%s\t%s\t%s\t%.2f\n",numcombinations,name1,name2,AlleleLikelihoods[i][j]);
} }
}else{ // }else{
if (DEBUG){out.printf("%s has allele frequency%.5f\n",name2,frq2);} // if (DEBUG){out.printf("%s has allele frequency%.5f\n",name2,frq2);}
} // }
}else{ // }else{
if (DEBUG){out.printf("%s not found in allele frequency file\n",name2);} // if (DEBUG){out.printf("%s not found in allele frequency file\n",name2);}
} }
} }
}else{ //}else{
if (DEBUG){out.printf("%s has allele frequency%.5f\n",name1,frq1);} // if (DEBUG){out.printf("%s has allele frequency%.5f\n",name1,frq1);}
} //}
}else{ }else{
if (DEBUG){out.printf("%s not found in allele frequency file\n",name1);} if (DEBUG){out.printf("%s not found in allele frequency file\n",name1);}
} }
} }
} }
private double CalculateLikelihood(int a1, int a2){ private double CalculateLikelihood(int a1, int a2, boolean debug){
String read1 = HLAreads[a1]; String read1 = HLAreads[a1];
String read2 = HLAreads[a2]; String read2 = HLAreads[a2];
int start1 = HLAstartpos[a1]; int start1 = HLAstartpos[a1];
@ -191,12 +212,11 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker<Integer, Intege
index = GenotypeIndex(read1.charAt(pos-start1),read2.charAt(pos-start2)); index = GenotypeIndex(read1.charAt(pos-start1),read2.charAt(pos-start2));
if (index > -1){ if (index > -1){
likelihood = likelihood + baseLikelihoods[i][index]; likelihood = likelihood + baseLikelihoods[i][index];
}else{ if (debug){
/*if (DEBUG){
c1 = read1.charAt(pos-start1); c1 = read1.charAt(pos-start1);
c2 = read2.charAt(pos-start2); c2 = read2.charAt(pos-start2);
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%.2f\n",HLAnames[a1],HLAnames[a2],pos,c1,c2,index,likelihood); out.printf("INFO: DEBUG %s\t%s\t%s\t%s\t%s\t%s\t%.2f\n",HLAnames[a1],HLAnames[a2],pos,c1,c2,index,likelihood);
}*/ }
} }
} }
} }