a) Small cleanup
b) Fix major issue with Beagle likelihood converter: if likelihood triplets from UG end up being too low, then Beagle input file will be produced with 0.00,0.00,0.00 triplet. If all samples at a marker have this issue, Beagle will effectively produce junk. To fix, likelihoods are renormalized before converting to linear space. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3491 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cfa18f6743
commit
d4c66d6191
|
|
@ -192,6 +192,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
String markerKey = phasedLine[1];
|
String markerKey = phasedLine[1];
|
||||||
|
|
||||||
HashMap<String,HaplotypePair> haplotypePairs = new HashMap<String,HaplotypePair>();
|
HashMap<String,HaplotypePair> haplotypePairs = new HashMap<String,HaplotypePair>();
|
||||||
|
System.out.println(markerKey);
|
||||||
|
|
||||||
j = 2;
|
j = 2;
|
||||||
for (String sample : samples) {
|
for (String sample : samples) {
|
||||||
|
|
@ -235,30 +236,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
return allThere;
|
return allThere;
|
||||||
|
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
|
|
||||||
|
|
||||||
private class MyFileReader {
|
|
||||||
private FileReader reader;
|
|
||||||
private String fileName;
|
|
||||||
public MyFileReader(String fileName) {
|
|
||||||
try{
|
|
||||||
reader = new FileReader(fileName);
|
|
||||||
} catch ( FileNotFoundException e) {
|
|
||||||
throw new StingException("Could not find required input file: " + fileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.fileName = fileName;
|
|
||||||
|
|
||||||
}
|
|
||||||
public String GetNextLine() {
|
|
||||||
String line;
|
|
||||||
int x = reader.read();
|
|
||||||
line.
|
|
||||||
//reader.read()
|
|
||||||
}
|
|
||||||
} */
|
|
||||||
|
|
||||||
private class BeagleFileReader {
|
private class BeagleFileReader {
|
||||||
private String headerString;
|
private String headerString;
|
||||||
private BufferedReader reader;
|
private BufferedReader reader;
|
||||||
|
|
|
||||||
|
|
@ -90,9 +90,6 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get Reference base for this site: will be output to screen, not directly used by Beagle but rather by output analysis tools
|
|
||||||
// char re = (char)ref.getBase();
|
|
||||||
|
|
||||||
// output marker ID to Beagle input file
|
// output marker ID to Beagle input file
|
||||||
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
|
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
|
||||||
|
|
||||||
|
|
@ -117,10 +114,21 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
|
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||||
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
|
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
|
||||||
|
|
||||||
|
Double maxLikelihood = -100.0;
|
||||||
|
ArrayList<Double> likeArray = new ArrayList<Double>();
|
||||||
|
|
||||||
for (String gl : glArray) {
|
for (String gl : glArray) {
|
||||||
Double d_gl = 100*Math.pow(10, Double.valueOf(gl));
|
// need to normalize likelihoods to avoid precision loss. In worst case, if all 3 log-likelihoods are too
|
||||||
beagleWriter.print(String.format("%5.2f ",d_gl));
|
// small, we could end up with linear likelihoods of form 0.00 0.00 0.00 which will mess up imputation.
|
||||||
}
|
Double dg = Double.valueOf(gl);
|
||||||
|
if (dg> maxLikelihood)
|
||||||
|
maxLikelihood = dg;
|
||||||
|
|
||||||
|
likeArray.add(dg);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Double likeVal: likeArray)
|
||||||
|
beagleWriter.print(String.format("%5.4f ",Math.pow(10, likeVal-maxLikelihood)));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue