a) Small cleanup
b) Fix major issue with Beagle likelihood converter: if likelihood triplets from UG end up being too low, then Beagle input file will be produced with 0.00,0.00,0.00 triplet. If all samples at a marker have this issue, Beagle will effectively produce junk. To fix, likelihoods are renormalized before converting to linear space. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3491 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cfa18f6743
commit
d4c66d6191
|
|
@ -192,6 +192,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
String markerKey = phasedLine[1];
|
||||
|
||||
HashMap<String,HaplotypePair> haplotypePairs = new HashMap<String,HaplotypePair>();
|
||||
System.out.println(markerKey);
|
||||
|
||||
j = 2;
|
||||
for (String sample : samples) {
|
||||
|
|
@ -235,30 +236,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
return allThere;
|
||||
|
||||
}
|
||||
/*
|
||||
|
||||
|
||||
private class MyFileReader {
|
||||
private FileReader reader;
|
||||
private String fileName;
|
||||
public MyFileReader(String fileName) {
|
||||
try{
|
||||
reader = new FileReader(fileName);
|
||||
} catch ( FileNotFoundException e) {
|
||||
throw new StingException("Could not find required input file: " + fileName);
|
||||
}
|
||||
|
||||
this.fileName = fileName;
|
||||
|
||||
}
|
||||
public String GetNextLine() {
|
||||
String line;
|
||||
int x = reader.read();
|
||||
line.
|
||||
//reader.read()
|
||||
}
|
||||
} */
|
||||
|
||||
private class BeagleFileReader {
|
||||
private String headerString;
|
||||
private BufferedReader reader;
|
||||
|
|
|
|||
|
|
@ -90,9 +90,6 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Get Reference base for this site: will be output to screen, not directly used by Beagle but rather by output analysis tools
|
||||
// char re = (char)ref.getBase();
|
||||
|
||||
// output marker ID to Beagle input file
|
||||
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
|
||||
|
||||
|
|
@ -117,10 +114,21 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
|
||||
|
||||
Double maxLikelihood = -100.0;
|
||||
ArrayList<Double> likeArray = new ArrayList<Double>();
|
||||
|
||||
for (String gl : glArray) {
|
||||
Double d_gl = 100*Math.pow(10, Double.valueOf(gl));
|
||||
beagleWriter.print(String.format("%5.2f ",d_gl));
|
||||
}
|
||||
// need to normalize likelihoods to avoid precision loss. In worst case, if all 3 log-likelihoods are too
|
||||
// small, we could end up with linear likelihoods of form 0.00 0.00 0.00 which will mess up imputation.
|
||||
Double dg = Double.valueOf(gl);
|
||||
if (dg> maxLikelihood)
|
||||
maxLikelihood = dg;
|
||||
|
||||
likeArray.add(dg);
|
||||
}
|
||||
|
||||
for (Double likeVal: likeArray)
|
||||
beagleWriter.print(String.format("%5.4f ",Math.pow(10, likeVal-maxLikelihood)));
|
||||
}
|
||||
else
|
||||
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
||||
|
|
|
|||
Loading…
Reference in New Issue