Three important updates for Dindel genotyper:
a) Fix it up because it broke with a recent checkin to annotate vcf with unfiltered depth. b) Printout of ref/alt alleles in output vcf was incorrect because the start/stop positions of associated GenomeLoc were incorrectly computed in case of a deletion. c) Redid Beagle input/output walkers as not assume that ref was a single base, not to assume that variant was a vcf and generalized it to be indel-capable, so now the Beagle walkers can be used for indels as well. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4541 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
990677ec36
commit
cf9c9ae241
|
|
@ -152,7 +152,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
if (haplotypeList.size() > 0) {
|
||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||
|
||||
//System.out.format("%d %d\n",bestIdx, secondBestIdx);
|
||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||
|
||||
return Arrays.asList(new Haplotype(haplotypeR.getBasesAsBytes(), 60), new Haplotype(haplotypeA.getBasesAsBytes(), contextSize));
|
||||
|
|
|
|||
|
|
@ -115,9 +115,9 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
GenomeLoc loc = context.getLocation();
|
||||
VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, false);
|
||||
VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, true);
|
||||
|
||||
VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, false);
|
||||
VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, true);
|
||||
|
||||
if ( vc_input == null )
|
||||
return 0;
|
||||
|
|
@ -199,32 +199,38 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
ArrayList<String> beagleProbabilities = beagleProbsFeature.getProbLikelihoods().get(sample);
|
||||
ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
|
||||
|
||||
// original alleles at this genotype
|
||||
Allele originalAlleleA = g.getAllele(0);
|
||||
|
||||
Allele originalAlleleB = (g.getAlleles().size() == 2) ? g.getAllele(1) : g.getAllele(0); // hack to deal with no-call genotypes
|
||||
|
||||
|
||||
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
|
||||
String alleleA = beagleGenotypePairs.get(0);
|
||||
String alleleB = beagleGenotypePairs.get(1);
|
||||
|
||||
byte[] r = alleleA.getBytes();
|
||||
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
||||
String refString = vc_input.getReference().getDisplayString();
|
||||
if (refString.length() == 0) // ref was null
|
||||
refString = Allele.NULL_ALLELE_STRING;
|
||||
|
||||
//System.out.println(context.getLocation() + " : " + alleleA + " " + alleleB);
|
||||
Allele bglAlleleA, bglAlleleB;
|
||||
|
||||
byte rA = r[0];
|
||||
if (alleleA.matches(refString))
|
||||
bglAlleleA = Allele.create(alleleA,true);
|
||||
else
|
||||
bglAlleleA = Allele.create(alleleA,false);
|
||||
|
||||
Boolean isRefA = (refByte == rA);
|
||||
if (alleleB.matches(refString))
|
||||
bglAlleleB = Allele.create(alleleB,true);
|
||||
else
|
||||
bglAlleleB = Allele.create(alleleB,false);
|
||||
|
||||
Allele refAllele = Allele.create(r, isRefA );
|
||||
alleles.add(refAllele);
|
||||
|
||||
r = alleleB.getBytes();
|
||||
byte rB = r[0];
|
||||
|
||||
Boolean isRefB = (refByte == rB);
|
||||
Allele altAllele = Allele.create(r,isRefB);
|
||||
alleles.add(altAllele);
|
||||
alleles.add(bglAlleleA);
|
||||
alleles.add(bglAlleleB);
|
||||
|
||||
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
||||
// Beagle gives probability that genotype is AA, AB and BB.
|
||||
|
|
@ -234,20 +240,22 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
||||
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
||||
|
||||
if (isRefA && isRefB) // HomRef call
|
||||
if (bglAlleleA.isReference() && bglAlleleB.isReference()) // HomRef call
|
||||
probWrongGenotype = hetProbability + homVarProbability;
|
||||
else if ((isRefB && !isRefA) || (isRefA && !isRefB))
|
||||
else if ((bglAlleleB.isReference() && bglAlleleA.isNonReference()) || (bglAlleleA.isReference() && bglAlleleB.isNonReference()))
|
||||
probWrongGenotype = homRefProbability + homVarProbability;
|
||||
else // HomVar call
|
||||
probWrongGenotype = hetProbability + homRefProbability;
|
||||
|
||||
// deal with numerical errors coming from limited formatting value on Beagle output files
|
||||
if (probWrongGenotype > 1 - MIN_PROB_ERROR)
|
||||
probWrongGenotype = 1 - MIN_PROB_ERROR;
|
||||
|
||||
if (1-probWrongGenotype < noCallThreshold) {
|
||||
// quality is bad: don't call genotype
|
||||
alleles.clear();
|
||||
refAllele = originalAlleleA;
|
||||
altAllele = originalAlleleB;
|
||||
alleles.add(refAllele);
|
||||
alleles.add(altAllele);
|
||||
alleles.add(originalAlleleA);
|
||||
alleles.add(originalAlleleB);
|
||||
genotypeIsPhased = false;
|
||||
}
|
||||
|
||||
|
|
@ -277,8 +285,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
og = a1+"/"+a2;
|
||||
|
||||
// See if Beagle switched genotypes
|
||||
if (!((refAllele.equals(originalAlleleA) && altAllele.equals(originalAlleleB) ||
|
||||
(refAllele.equals(originalAlleleB) && altAllele.equals(originalAlleleA))))){
|
||||
if (!((bglAlleleA.equals(originalAlleleA) && bglAlleleB.equals(originalAlleleB) ||
|
||||
(bglAlleleA.equals(originalAlleleB) && bglAlleleB.equals(originalAlleleA))))){
|
||||
originalAttributes.put("OG",og);
|
||||
numGenotypesChangedByBeagle++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,8 +109,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
VariantContext variant_eval;
|
||||
VariantContext validation_eval;
|
||||
|
||||
variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, false);
|
||||
validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc,false);
|
||||
variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, true);
|
||||
validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc, true);
|
||||
if ( goodSite(variant_eval,validation_eval) ) {
|
||||
if ( useValidation(variant_eval,validation_eval, ref) ) {
|
||||
writeBeagleOutput(validation_eval,variant_eval,true,validationPrior);
|
||||
|
|
@ -136,7 +136,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public boolean goodSite(VariantContext v) {
|
||||
return v != null && ! v.isFiltered() && v.isSNP() && v.isBiallelic() && v.hasGenotypes();
|
||||
return v != null && ! v.isFiltered() && v.isBiallelic() && v.hasGenotypes();
|
||||
}
|
||||
|
||||
public boolean useValidation(VariantContext variant, VariantContext validation, ReferenceContext ref) {
|
||||
|
|
@ -165,7 +165,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
|
||||
beagleWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
||||
GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC);
|
||||
beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()));
|
||||
if ( beagleGenotypesWriter != null ) {
|
||||
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
||||
}
|
||||
|
|
@ -173,9 +174,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||
String bglPrintString;
|
||||
if (allele.isNoCall() || allele.isNull())
|
||||
bglPrintString = "0";
|
||||
bglPrintString = "-";
|
||||
else
|
||||
bglPrintString = allele.toString().substring(0,1); // get rid of * in case of reference allele
|
||||
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
||||
|
||||
beagleWriter.print(String.format("%s ", bglPrintString));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.util.variantcontext.Allele;
|
|||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -224,7 +225,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
if (context.hasBasePileup())
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getBasePileup()));
|
||||
|
||||
else if (context.hasExtendedEventPileup())
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getExtendedEventPileup()));
|
||||
|
||||
double qual;
|
||||
double[] posteriors = GLs.get(sample).getPosteriors();
|
||||
|
||||
|
|
|
|||
|
|
@ -264,9 +264,23 @@ public class UnifiedGenotyperEngine {
|
|||
GenomeLoc loc = refContext.getLocus();
|
||||
|
||||
// todo - temp fix until we can deal with extended events properly
|
||||
long endLoc;
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean isSNP = true;
|
||||
for (Allele a:alleles){
|
||||
if (a.getBaseString().length() != 1) {
|
||||
isSNP = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isSNP)
|
||||
endLoc = loc.getStart();
|
||||
else
|
||||
endLoc = loc.getStart() + refAllele.length();
|
||||
|
||||
//VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
||||
VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(),
|
||||
(refAllele.length() > 0 ? loc.getStart()+refAllele.length()-1 : loc.getStart()),
|
||||
VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
|
||||
alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue