Simple change to handle a no-call (must avoid asking for the second allele, which will be be null in this case). Also, added a hack to deal with input VCFs where there are no genotype likelihoods (needed in order to process Hapmap and 1KG VCFs). In this mode, called genotypes are assigned a likelihood of 0.96, and alternative genotypes are given 0.02 each. I know Beagle actually takes genotype data without likelihoods, so this might not be the right way to do this.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4081 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dec713a184
commit
295472bf69
|
|
@ -204,7 +204,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
|
ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
|
||||||
|
|
||||||
Allele originalAlleleA = g.getAllele(0);
|
Allele originalAlleleA = g.getAllele(0);
|
||||||
Allele originalAlleleB = g.getAllele(1);
|
Allele originalAlleleB = (g.getAlleles().size() == 2) ? g.getAllele(1) : g.getAllele(0); // hack to deal with no-call genotypes
|
||||||
|
|
||||||
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
|
@ -213,6 +213,9 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
String alleleB = beagleGenotypePairs.get(1);
|
String alleleB = beagleGenotypePairs.get(1);
|
||||||
|
|
||||||
byte[] r = alleleA.getBytes();
|
byte[] r = alleleA.getBytes();
|
||||||
|
|
||||||
|
//System.out.println(context.getLocation() + " : " + alleleA + " " + alleleB);
|
||||||
|
|
||||||
byte rA = r[0];
|
byte rA = r[0];
|
||||||
|
|
||||||
Boolean isRefA = (refByte == rA);
|
Boolean isRefA = (refByte == rA);
|
||||||
|
|
|
||||||
|
|
@ -148,6 +148,26 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
beagleGenotypesWriter.format("%c %c ", a, b);
|
beagleGenotypesWriter.format("%c %c ", a, b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (genotype.isCalled() && !genotype.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||||
|
// hack to deal with input VCFs with no genotype likelihoods. Just assume the called genotype
|
||||||
|
// is confident. This is useful for Hapmap and 1KG release VCFs.
|
||||||
|
double AA = 0.02;
|
||||||
|
double AB = 0.02;
|
||||||
|
double BB = 0.02;
|
||||||
|
|
||||||
|
if (genotype.isHomRef()) { AA = 0.96; }
|
||||||
|
else if (genotype.isHet()) { AB = 0.96; }
|
||||||
|
else if (genotype.isHomVar()) { BB = 0.96; }
|
||||||
|
|
||||||
|
beagleWriter.printf("%.2f %.2f %.2f ", AA, AB, BB);
|
||||||
|
|
||||||
|
if (beagleGenotypesWriter != null) {
|
||||||
|
char a = genotype.getAllele(0).toString().charAt(0);
|
||||||
|
char b = genotype.getAllele(0).toString().charAt(0);
|
||||||
|
|
||||||
|
beagleGenotypesWriter.format("%c %c ", a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
||||||
if (beagleGenotypesWriter != null)
|
if (beagleGenotypesWriter != null)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue