Handles allele frequencies for any specified population, changed user input for mismatch filter options
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1909 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
db9419df49
commit
24c7f694e6
|
|
@ -28,14 +28,16 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
@Argument(fullName = "debugAlleles", shortName = "debugAlleles", doc = "Print likelihood scores for these alleles", required = false)
|
||||
public String inputAlleles = "";
|
||||
@Argument(fullName = "ethnicity", shortName = "ethnicity", doc = "Use allele frequencies for this ethnic group", required = false)
|
||||
public String ethnicity = "Caucasian";
|
||||
public String ethnicity = "CaucasianUSA";
|
||||
@Argument(fullName = "filter", shortName = "filter", doc = "file containing reads to exclude", required = false)
|
||||
public String filterFile = "";
|
||||
@Argument(fullName = "minAllowedMismatches", shortName = "minAllowedMismatches", doc = "Min number of mismatches tolerated per read (default 7)", required = false)
|
||||
public int MINALLOWEDMISMATCHES = 7;
|
||||
|
||||
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.imputed.4digit.sam";
|
||||
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
||||
String AlleleFrequencyFile;
|
||||
String AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||
|
||||
SAMFileReader HLADictionaryReader = new SAMFileReader();
|
||||
String[] HLAreads, HLAnames;
|
||||
|
|
@ -67,10 +69,8 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
|
||||
|
||||
|
||||
if (ethnicity.equals("Black")){
|
||||
AlleleFrequencyFile = BlackAlleleFrequencyFile;
|
||||
}else{
|
||||
AlleleFrequencyFile = CaucasianAlleleFrequencyFile;
|
||||
if (!ethnicity.equals("CaucasianUSA")){
|
||||
AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_" + ethnicity + ".freq";
|
||||
}
|
||||
out.printf("INFO Reading HLA allele frequencies ... ");
|
||||
FrequencyFileReader HLAfreqReader = new FrequencyFileReader();
|
||||
|
|
@ -82,7 +82,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
if (!filterFile.equals("")){
|
||||
out.printf("INFO Reading properties file ... ");
|
||||
SimilarityFileReader similarityReader = new SimilarityFileReader();
|
||||
similarityReader.ReadFile(filterFile);
|
||||
similarityReader.ReadFile(filterFile,MINALLOWEDMISMATCHES);
|
||||
ReadsToDiscard = similarityReader.GetReadsToDiscard();
|
||||
out.printf("Done! Found %s misaligned reads to discard.\n",ReadsToDiscard.size());
|
||||
for (int i = 0; i < ReadsToDiscard.size(); i++){
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
public String filterFile = "";
|
||||
|
||||
@Argument(fullName = "ethnicity", shortName = "ethnicity", doc = "Use allele frequencies for this ethnic group", required = false)
|
||||
public String ethnicity = "Caucasian";
|
||||
public String ethnicity = "CaucasiansUSA";
|
||||
|
||||
@Argument(fullName = "debugAlleles", shortName = "debugAlleles", doc = "Print likelihood scores for these alleles", required = false)
|
||||
public String debugAlleles = "";
|
||||
|
|
@ -39,11 +39,14 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
@Argument(fullName = "onlyfrequent", shortName = "onlyfrequent", doc = "Only consider alleles with frequency > 0.0001", required = false)
|
||||
public boolean ONLYFREQUENT = false;
|
||||
|
||||
@Argument(fullName = "minAllowedMismatches", shortName = "minAllowedMismatches", doc = "Min number of mismatches tolerated per read (default 7)", required = false)
|
||||
public int MINALLOWEDMISMATCHES = 7;
|
||||
|
||||
GATKArgumentCollection args = this.getToolkit().getArguments();
|
||||
|
||||
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
||||
String AlleleFrequencyFile;
|
||||
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
||||
String AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||
|
||||
String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.nuc.imputed.4digit.sam";
|
||||
SAMFileReader HLADictionaryReader = new SAMFileReader();
|
||||
|
|
@ -59,6 +62,11 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
Hashtable AlleleFrequencies;
|
||||
int numIntervals;
|
||||
|
||||
double P_err = 0.01;
|
||||
double P_correct = 1 - P_err;
|
||||
double L_err = Math.log10(P_err);
|
||||
double L_correct = Math.log10(P_correct);
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
||||
if (!HLAdataLoaded){
|
||||
|
|
@ -76,7 +84,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
if (!filterFile.equals("")){
|
||||
out.printf("INFO Reading properties file ... ");
|
||||
SimilarityFileReader similarityReader = new SimilarityFileReader();
|
||||
similarityReader.ReadFile(filterFile);
|
||||
similarityReader.ReadFile(filterFile,MINALLOWEDMISMATCHES);
|
||||
ReadsToDiscard = similarityReader.GetReadsToDiscard();
|
||||
out.printf("Done! Found %s misaligned reads to discard.\n",ReadsToDiscard.size());
|
||||
}
|
||||
|
|
@ -102,12 +110,10 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
totalObservations = new int[l][l];
|
||||
|
||||
//Read allele frequencies
|
||||
if (ethnicity.equals("Black")){
|
||||
AlleleFrequencyFile = BlackAlleleFrequencyFile;
|
||||
}else{
|
||||
AlleleFrequencyFile = CaucasianAlleleFrequencyFile;
|
||||
if (!ethnicity.equals("CaucasianUSA")){
|
||||
AlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_" + ethnicity + ".freq";
|
||||
}
|
||||
out.printf("INFO Reading HLA allele frequencies ... ");
|
||||
out.printf("INFO Reading HLA allele frequencies for %s population ... ", ethnicity);
|
||||
FrequencyFileReader HLAfreqReader = new FrequencyFileReader();
|
||||
HLAfreqReader.ReadFile(AlleleFrequencyFile);
|
||||
AlleleFrequencies = HLAfreqReader.GetAlleleFrequencies();
|
||||
|
|
@ -295,7 +301,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
int numPositions = PolymorphicSites.length, SNPcount = 0;
|
||||
int i, j, a1, a2, b1, b2;
|
||||
char c11, c12, c21, c22;
|
||||
int numInPhase = 0;
|
||||
int numInPhase = 0, numOutOfPhase = 0;
|
||||
double sumInPhase = 0.0, sumObservations = 0.0;
|
||||
|
||||
|
||||
|
|
@ -334,15 +340,18 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
|||
}else{
|
||||
numInPhase = numObservations[a1][b1] + numObservations[a2][b2];
|
||||
}
|
||||
prob = Math.max((double) numInPhase / (double) totalObservations[i][j], 0.0001);
|
||||
numOutOfPhase = totalObservations[i][j] - numInPhase;
|
||||
sumInPhase += (double) numInPhase;
|
||||
sumObservations += (double) totalObservations[i][j];
|
||||
|
||||
likelihood += Math.log10(prob);
|
||||
likelihood += numInPhase * L_correct + numOutOfPhase * L_err;
|
||||
|
||||
//prob = Math.max((double) numInPhase / (double) totalObservations[i][j], 0.0001);
|
||||
//likelihood += Math.log10(prob);
|
||||
//likelihood = Math.max(Math.log10(sumInPhase / sumObservations),-10);
|
||||
|
||||
if (PRINTDEBUG){
|
||||
out.printf("DEBUG %s %s %s[%s%s] %s[%s%s]\t[%s,%s]\t[%s,%s] [%s,%s]\t%s / %s\t%s / %s\t%.4f\t%.2f\n",HLAnames[alleleIndex1],HLAnames[alleleIndex2],PolymorphicSites[i],c11,c21,PolymorphicSites[j],c12,c22, i,j,a1,b1,a2,b2,numInPhase,totalObservations[i][j],sumInPhase,sumObservations,prob,likelihood);
|
||||
out.printf("DEBUG %s %s %s[%s%s] %s[%s%s]\t[%s,%s]\t[%s,%s] [%s,%s]\t%s / %s\t%s / %s\t %.2f\n",HLAnames[alleleIndex1],HLAnames[alleleIndex2],PolymorphicSites[i],c11,c21,PolymorphicSites[j],c12,c22, i,j,a1,b1,a2,b2,numInPhase,totalObservations[i][j],sumInPhase,sumObservations,likelihood);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class SimilarityFileReader {
|
|||
return ReadsToDiscard.toArray(new String[ReadsToDiscard.size()]);
|
||||
}
|
||||
|
||||
public void ReadFile(String filename){
|
||||
public void ReadFile(String filename, int minAllowedMismatches){
|
||||
try{
|
||||
FileInputStream fstream = new FileInputStream(filename);
|
||||
DataInputStream in = new DataInputStream(fstream);
|
||||
|
|
@ -35,7 +35,7 @@ public class SimilarityFileReader {
|
|||
if (s.length >= 6){
|
||||
Double matchFraction = Double.valueOf(s[4]);
|
||||
int numMismatches = Integer.valueOf(s[6]);
|
||||
if ((matchFraction < 0.9 && numMismatches > 3) || (numMismatches >= 6)){
|
||||
if ((matchFraction < 0.9 && numMismatches > 3) || (numMismatches > minAllowedMismatches)){
|
||||
ReadsToDiscard.add(s[0]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue