a) Small cleanup

b) Fix major issue with Beagle likelihood converter: if likelihood triplets from UG end up being too low, then Beagle input file will be produced with 0.00,0.00,0.00 triplet. If all samples at a marker have this issue, Beagle will effectively produce junk. To fix, likelihoods are renormalized before converting to linear space.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3491 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-06-06 17:31:59 +00:00
parent cfa18f6743
commit d4c66d6191
2 changed files with 15 additions and 28 deletions

View File

@ -192,6 +192,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
String markerKey = phasedLine[1];
HashMap<String,HaplotypePair> haplotypePairs = new HashMap<String,HaplotypePair>();
System.out.println(markerKey);
j = 2;
for (String sample : samples) {
@ -235,30 +236,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
return allThere;
}
/*
private class MyFileReader {
private FileReader reader;
private String fileName;
public MyFileReader(String fileName) {
try{
reader = new FileReader(fileName);
} catch ( FileNotFoundException e) {
throw new StingException("Could not find required input file: " + fileName);
}
this.fileName = fileName;
}
public String GetNextLine() {
String line;
int x = reader.read();
line.
//reader.read()
}
} */
private class BeagleFileReader {
private String headerString;
private BufferedReader reader;

View File

@ -90,9 +90,6 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
return 0;
}
// Get Reference base for this site: will be output to screen, not directly used by Beagle but rather by output analysis tools
// char re = (char)ref.getBase();
// output marker ID to Beagle input file
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
@ -117,10 +114,21 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
Double maxLikelihood = -100.0;
ArrayList<Double> likeArray = new ArrayList<Double>();
for (String gl : glArray) {
Double d_gl = 100*Math.pow(10, Double.valueOf(gl));
beagleWriter.print(String.format("%5.2f ",d_gl));
}
// need to normalize likelihoods to avoid precision loss. In worst case, if all 3 log-likelihoods are too
// small, we could end up with linear likelihoods of form 0.00 0.00 0.00 which will mess up imputation.
Double dg = Double.valueOf(gl);
if (dg> maxLikelihood)
maxLikelihood = dg;
likeArray.add(dg);
}
for (Double likeVal: likeArray)
beagleWriter.print(String.format("%5.4f ",Math.pow(10, likeVal-maxLikelihood)));
}
else
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.