Three important updates for Dindel genotyper:
a) Fix it up because it broke with a recent checkin to annotate vcf with unfiltered depth. b) Printout of ref/alt alleles in output vcf was incorrect because the start/stop positions of associated GenomeLoc were incorrectly computed in case of a deletion. c) Redid Beagle input/output walkers as not assume that ref was a single base, not to assume that variant was a vcf and generalized it to be indel-capable, so now the Beagle walkers can be used for indels as well. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4541 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
990677ec36
commit
cf9c9ae241
|
|
@ -152,7 +152,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
if (haplotypeList.size() > 0) {
|
if (haplotypeList.size() > 0) {
|
||||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||||
|
//System.out.format("%d %d\n",bestIdx, secondBestIdx);
|
||||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||||
|
|
||||||
return Arrays.asList(new Haplotype(haplotypeR.getBasesAsBytes(), 60), new Haplotype(haplotypeA.getBasesAsBytes(), contextSize));
|
return Arrays.asList(new Haplotype(haplotypeR.getBasesAsBytes(), 60), new Haplotype(haplotypeA.getBasesAsBytes(), contextSize));
|
||||||
|
|
|
||||||
|
|
@ -115,9 +115,9 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
GenomeLoc loc = context.getLocation();
|
GenomeLoc loc = context.getLocation();
|
||||||
VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, false);
|
VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, true);
|
||||||
|
|
||||||
VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, false);
|
VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, true);
|
||||||
|
|
||||||
if ( vc_input == null )
|
if ( vc_input == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -199,32 +199,38 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
ArrayList<String> beagleProbabilities = beagleProbsFeature.getProbLikelihoods().get(sample);
|
ArrayList<String> beagleProbabilities = beagleProbsFeature.getProbLikelihoods().get(sample);
|
||||||
ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
|
ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
|
||||||
|
|
||||||
|
// original alleles at this genotype
|
||||||
Allele originalAlleleA = g.getAllele(0);
|
Allele originalAlleleA = g.getAllele(0);
|
||||||
|
|
||||||
Allele originalAlleleB = (g.getAlleles().size() == 2) ? g.getAllele(1) : g.getAllele(0); // hack to deal with no-call genotypes
|
Allele originalAlleleB = (g.getAlleles().size() == 2) ? g.getAllele(1) : g.getAllele(0); // hack to deal with no-call genotypes
|
||||||
|
|
||||||
|
|
||||||
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
|
||||||
String alleleA = beagleGenotypePairs.get(0);
|
String alleleA = beagleGenotypePairs.get(0);
|
||||||
String alleleB = beagleGenotypePairs.get(1);
|
String alleleB = beagleGenotypePairs.get(1);
|
||||||
|
|
||||||
byte[] r = alleleA.getBytes();
|
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
||||||
|
String refString = vc_input.getReference().getDisplayString();
|
||||||
|
if (refString.length() == 0) // ref was null
|
||||||
|
refString = Allele.NULL_ALLELE_STRING;
|
||||||
|
|
||||||
//System.out.println(context.getLocation() + " : " + alleleA + " " + alleleB);
|
Allele bglAlleleA, bglAlleleB;
|
||||||
|
|
||||||
byte rA = r[0];
|
if (alleleA.matches(refString))
|
||||||
|
bglAlleleA = Allele.create(alleleA,true);
|
||||||
|
else
|
||||||
|
bglAlleleA = Allele.create(alleleA,false);
|
||||||
|
|
||||||
Boolean isRefA = (refByte == rA);
|
if (alleleB.matches(refString))
|
||||||
|
bglAlleleB = Allele.create(alleleB,true);
|
||||||
|
else
|
||||||
|
bglAlleleB = Allele.create(alleleB,false);
|
||||||
|
|
||||||
Allele refAllele = Allele.create(r, isRefA );
|
|
||||||
alleles.add(refAllele);
|
|
||||||
|
|
||||||
r = alleleB.getBytes();
|
alleles.add(bglAlleleA);
|
||||||
byte rB = r[0];
|
alleles.add(bglAlleleB);
|
||||||
|
|
||||||
Boolean isRefB = (refByte == rB);
|
|
||||||
Allele altAllele = Allele.create(r,isRefB);
|
|
||||||
alleles.add(altAllele);
|
|
||||||
|
|
||||||
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
||||||
// Beagle gives probability that genotype is AA, AB and BB.
|
// Beagle gives probability that genotype is AA, AB and BB.
|
||||||
|
|
@ -234,20 +240,22 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
||||||
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
||||||
|
|
||||||
if (isRefA && isRefB) // HomRef call
|
if (bglAlleleA.isReference() && bglAlleleB.isReference()) // HomRef call
|
||||||
probWrongGenotype = hetProbability + homVarProbability;
|
probWrongGenotype = hetProbability + homVarProbability;
|
||||||
else if ((isRefB && !isRefA) || (isRefA && !isRefB))
|
else if ((bglAlleleB.isReference() && bglAlleleA.isNonReference()) || (bglAlleleA.isReference() && bglAlleleB.isNonReference()))
|
||||||
probWrongGenotype = homRefProbability + homVarProbability;
|
probWrongGenotype = homRefProbability + homVarProbability;
|
||||||
else // HomVar call
|
else // HomVar call
|
||||||
probWrongGenotype = hetProbability + homRefProbability;
|
probWrongGenotype = hetProbability + homRefProbability;
|
||||||
|
|
||||||
|
// deal with numerical errors coming from limited formatting value on Beagle output files
|
||||||
|
if (probWrongGenotype > 1 - MIN_PROB_ERROR)
|
||||||
|
probWrongGenotype = 1 - MIN_PROB_ERROR;
|
||||||
|
|
||||||
if (1-probWrongGenotype < noCallThreshold) {
|
if (1-probWrongGenotype < noCallThreshold) {
|
||||||
// quality is bad: don't call genotype
|
// quality is bad: don't call genotype
|
||||||
alleles.clear();
|
alleles.clear();
|
||||||
refAllele = originalAlleleA;
|
alleles.add(originalAlleleA);
|
||||||
altAllele = originalAlleleB;
|
alleles.add(originalAlleleB);
|
||||||
alleles.add(refAllele);
|
|
||||||
alleles.add(altAllele);
|
|
||||||
genotypeIsPhased = false;
|
genotypeIsPhased = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -277,8 +285,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
og = a1+"/"+a2;
|
og = a1+"/"+a2;
|
||||||
|
|
||||||
// See if Beagle switched genotypes
|
// See if Beagle switched genotypes
|
||||||
if (!((refAllele.equals(originalAlleleA) && altAllele.equals(originalAlleleB) ||
|
if (!((bglAlleleA.equals(originalAlleleA) && bglAlleleB.equals(originalAlleleB) ||
|
||||||
(refAllele.equals(originalAlleleB) && altAllele.equals(originalAlleleA))))){
|
(bglAlleleA.equals(originalAlleleB) && bglAlleleB.equals(originalAlleleA))))){
|
||||||
originalAttributes.put("OG",og);
|
originalAttributes.put("OG",og);
|
||||||
numGenotypesChangedByBeagle++;
|
numGenotypesChangedByBeagle++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -109,8 +109,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
VariantContext variant_eval;
|
VariantContext variant_eval;
|
||||||
VariantContext validation_eval;
|
VariantContext validation_eval;
|
||||||
|
|
||||||
variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, false);
|
variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, true);
|
||||||
validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc,false);
|
validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc, true);
|
||||||
if ( goodSite(variant_eval,validation_eval) ) {
|
if ( goodSite(variant_eval,validation_eval) ) {
|
||||||
if ( useValidation(variant_eval,validation_eval, ref) ) {
|
if ( useValidation(variant_eval,validation_eval, ref) ) {
|
||||||
writeBeagleOutput(validation_eval,variant_eval,true,validationPrior);
|
writeBeagleOutput(validation_eval,variant_eval,true,validationPrior);
|
||||||
|
|
@ -136,7 +136,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean goodSite(VariantContext v) {
|
public boolean goodSite(VariantContext v) {
|
||||||
return v != null && ! v.isFiltered() && v.isSNP() && v.isBiallelic() && v.hasGenotypes();
|
return v != null && ! v.isFiltered() && v.isBiallelic() && v.hasGenotypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean useValidation(VariantContext variant, VariantContext validation, ReferenceContext ref) {
|
public boolean useValidation(VariantContext variant, VariantContext validation, ReferenceContext ref) {
|
||||||
|
|
@ -165,7 +165,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
|
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
|
||||||
beagleWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC);
|
||||||
|
beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()));
|
||||||
if ( beagleGenotypesWriter != null ) {
|
if ( beagleGenotypesWriter != null ) {
|
||||||
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
||||||
}
|
}
|
||||||
|
|
@ -173,9 +174,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
for ( Allele allele : preferredVC.getAlleles() ) {
|
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||||
String bglPrintString;
|
String bglPrintString;
|
||||||
if (allele.isNoCall() || allele.isNull())
|
if (allele.isNoCall() || allele.isNull())
|
||||||
bglPrintString = "0";
|
bglPrintString = "-";
|
||||||
else
|
else
|
||||||
bglPrintString = allele.toString().substring(0,1); // get rid of * in case of reference allele
|
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
||||||
|
|
||||||
beagleWriter.print(String.format("%s ", bglPrintString));
|
beagleWriter.print(String.format("%s ", bglPrintString));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.util.variantcontext.Allele;
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -224,7 +225,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
|
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||||
|
|
||||||
|
if (context.hasBasePileup())
|
||||||
|
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getBasePileup()));
|
||||||
|
|
||||||
|
else if (context.hasExtendedEventPileup())
|
||||||
|
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getExtendedEventPileup()));
|
||||||
|
|
||||||
double qual;
|
double qual;
|
||||||
double[] posteriors = GLs.get(sample).getPosteriors();
|
double[] posteriors = GLs.get(sample).getPosteriors();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -264,9 +264,23 @@ public class UnifiedGenotyperEngine {
|
||||||
GenomeLoc loc = refContext.getLocus();
|
GenomeLoc loc = refContext.getLocus();
|
||||||
|
|
||||||
// todo - temp fix until we can deal with extended events properly
|
// todo - temp fix until we can deal with extended events properly
|
||||||
|
long endLoc;
|
||||||
|
// for indels, stop location is one more than ref allele length
|
||||||
|
boolean isSNP = true;
|
||||||
|
for (Allele a:alleles){
|
||||||
|
if (a.getBaseString().length() != 1) {
|
||||||
|
isSNP = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSNP)
|
||||||
|
endLoc = loc.getStart();
|
||||||
|
else
|
||||||
|
endLoc = loc.getStart() + refAllele.length();
|
||||||
|
|
||||||
//VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
//VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
||||||
VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(),
|
VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
|
||||||
(refAllele.length() > 0 ? loc.getStart()+refAllele.length()-1 : loc.getStart()),
|
|
||||||
alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue