Updated documentation

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2366 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
sjia 2009-12-15 20:52:08 +00:00
parent 18f61d2586
commit a80a5f1036
1 changed files with 29 additions and 16 deletions

View File

@ -1,6 +1,6 @@
/* /*
* To change this template, choose Tools | Templates * Calculates the likelihood of observing data given phase info from pairs of HLA alleles. Note: Run FindClosestAlleleWalker first! Usage: java -jar $GATK -T CalculatePhaseLikelihoods -I INPUT.bam -R /broad/1KG/reference/human_b36_both.fasta -L /humgen/gsa-scr1/GSA/sjia/454_HLA/HAPMAP270/HLA_exons.interval -phaseInterval /humgen/gsa-scr1/GSA/sjia/454_HLA/HAPMAP270/HLA_exons.interval -bl IMPUT.baselikelihoods [-filter $ID.filter -minAllowe\
* and open the template in the editor. dMismatches 7] -ethnicity Caucasian | grep -v "INFO" | grep -v "DEBUG" | grep -v "DONE!" > OUTPUT.phaselikelihoods
*/ */
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
@ -47,6 +47,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq"; String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq"; String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
String AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq"; String AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
String UniqueAllelesFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/UniqueAlleles";
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.imputed.4digit.sam"; String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.imputed.4digit.sam";
SAMFileReader HLADictionaryReader = new SAMFileReader(); SAMFileReader HLADictionaryReader = new SAMFileReader();
@ -59,7 +60,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
int[][] numObservations, totalObservations, intervals; int[][] numObservations, totalObservations, intervals;
int[] SNPnumInRead, SNPposInRead; int[] SNPnumInRead, SNPposInRead;
CigarParser cigarparser = new CigarParser(); CigarParser cigarparser = new CigarParser();
Hashtable AlleleFrequencies; Hashtable AlleleFrequencies, UniqueAlleles;
int numIntervals; int numIntervals;
double P_err = 0.01; double P_err = 0.01;
@ -115,8 +116,9 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
} }
out.printf("INFO Reading HLA allele frequencies for %s population ... ", ethnicity); out.printf("INFO Reading HLA allele frequencies for %s population ... ", ethnicity);
FrequencyFileReader HLAfreqReader = new FrequencyFileReader(); FrequencyFileReader HLAfreqReader = new FrequencyFileReader();
HLAfreqReader.ReadFile(AlleleFrequencyFile); HLAfreqReader.ReadFile(AlleleFrequencyFile,UniqueAllelesFile);
AlleleFrequencies = HLAfreqReader.GetAlleleFrequencies(); AlleleFrequencies = HLAfreqReader.GetAlleleFrequencies();
UniqueAlleles = HLAfreqReader.GetUniqueAlleles();
out.printf("Done! Frequencies for %s HLA alleles loaded.\n",AlleleFrequencies.size()); out.printf("Done! Frequencies for %s HLA alleles loaded.\n",AlleleFrequencies.size());
/* /*
@ -170,7 +172,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
public void onTraversalDone(Integer numreads) { public void onTraversalDone(Integer numreads) {
String name1, name2; String name1, name2;
Double frq1, frq2, likelihood, minfrq = 0.0; Double frq1 = 0.0, frq2 = 0.0, likelihood, minfrq = 0.0;
int numCombinations = 0; int numCombinations = 0;
@ -178,7 +180,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
String s[] = debugAlleles.split(","); String s[] = debugAlleles.split(",");
int index1 = HLADictionaryReader.GetReadIndex(s[0]); int index1 = HLADictionaryReader.GetReadIndex(s[0]);
int index2 = HLADictionaryReader.GetReadIndex(s[1]); int index2 = HLADictionaryReader.GetReadIndex(s[1]);
//out.printf("INFO: debugging %s\t%s\t%s\t%s\n",s[0],s[0],index1,index2); out.printf("INFO: debugging %s\t%s\t%s\t%s\n",s[0],s[1],index1,index2);
if (index1 > -1 && index2 > -1){ if (index1 > -1 && index2 > -1){
likelihood = CalculatePhaseLikelihood(index1,index2,true); likelihood = CalculatePhaseLikelihood(index1,index2,true);
} }
@ -191,23 +193,31 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
out.printf("NUM\tAllele1\tAllele2\tPhase\tFrq1\tFrq2\n"); out.printf("NUM\tAllele1\tAllele2\tPhase\tFrq1\tFrq2\n");
for (int i = 0; i < HLAnames.length; i++){ for (int i = 0; i < HLAnames.length; i++){
name1 = HLAnames[i].substring(4); name1 = HLAnames[i].substring(4);
if (UniqueAlleles.containsKey(name1)){
if (AlleleFrequencies.containsKey(name1)){ if (AlleleFrequencies.containsKey(name1)){
frq1 = Double.parseDouble((String) AlleleFrequencies.get(name1).toString()); frq1 = Double.parseDouble((String) AlleleFrequencies.get(name1).toString());
if (frq1 > minfrq){ }else{
frq1 = .0001;
}
//if (frq1 > minfrq){
for (int j = i; j < HLAnames.length; j++){ for (int j = i; j < HLAnames.length; j++){
name2 = HLAnames[j].substring(4); name2 = HLAnames[j].substring(4);
if (name1.substring(0,1).equals(name2.substring(0,1))){ if (name1.substring(0,1).equals(name2.substring(0,1))){
if (UniqueAlleles.containsKey(name2)){
if (AlleleFrequencies.containsKey(name2)){ if (AlleleFrequencies.containsKey(name2)){
frq2 = Double.parseDouble((String) AlleleFrequencies.get(name2).toString()); frq2 = Double.parseDouble((String) AlleleFrequencies.get(name2).toString());
if (frq2 > minfrq){ }else{
frq2 = .0001;
}
// if (frq2 > minfrq){
likelihood = CalculatePhaseLikelihood(i,j,false); likelihood = CalculatePhaseLikelihood(i,j,false);
numCombinations++; numCombinations++;
out.printf("%s\t%s\t%s\t%.2f\t%.2f\t%.2f\n",numCombinations,name1,name2,likelihood,Math.log10(frq1),Math.log10(frq2)); out.printf("%s\t%s\t%s\t%.2f\t%.2f\t%.2f\n",numCombinations,name1,name2,likelihood,Math.log10(frq1),Math.log10(frq2));
// }
} }
} }
} }
} //}
}
} }
} }
} }
@ -316,9 +326,11 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
SNPposInRead[i] = -1; SNPposInRead[i] = -1;
} }
} }
String s1 = HLAreads[alleleIndex1]; String s1 = HLAreads[alleleIndex1];
String s2 = HLAreads[alleleIndex2]; String s2 = HLAreads[alleleIndex2];
if (PRINTDEBUG){
out.printf("DEBUG %s SNPs found in %s and %s\n",SNPcount,HLAnames[alleleIndex1], HLAnames[alleleIndex2]);
}
//Iterate through every pairwise combination of SNPs, and update likelihood for the allele combination //Iterate through every pairwise combination of SNPs, and update likelihood for the allele combination
for (i = 0; i < numPositions; i++){ for (i = 0; i < numPositions; i++){
if (SNPnumInRead[i] > -1){ if (SNPnumInRead[i] > -1){
@ -353,6 +365,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
if (PRINTDEBUG){ if (PRINTDEBUG){
out.printf("DEBUG %s %s %s[%s%s] %s[%s%s]\t[%s,%s]\t[%s,%s] [%s,%s]\t%s / %s\t%s / %s\t %.2f\n",HLAnames[alleleIndex1],HLAnames[alleleIndex2],PolymorphicSites[i],c11,c21,PolymorphicSites[j],c12,c22, i,j,a1,b1,a2,b2,numInPhase,totalObservations[i][j],sumInPhase,sumObservations,likelihood); out.printf("DEBUG %s %s %s[%s%s] %s[%s%s]\t[%s,%s]\t[%s,%s] [%s,%s]\t%s / %s\t%s / %s\t %.2f\n",HLAnames[alleleIndex1],HLAnames[alleleIndex2],PolymorphicSites[i],c11,c21,PolymorphicSites[j],c12,c22, i,j,a1,b1,a2,b2,numInPhase,totalObservations[i][j],sumInPhase,sumObservations,likelihood);
} }
break;
} }
} }
} }