A brief implementation of a QD calculation that is not quite so bimodal for known variants (multiplicatively penalizes QD by (n variant samples)/(n variant alleles) ). Not sure how helpful this will be (which is why it is in oneoffs). Seems nice on MCKD1, but I'm still playing with the optimization.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4024 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c6a8fba922
commit
49a3db9dfe
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.AnnotationByDepth;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A Qual By Depth calculation adjusted to account for allele counts by (n var samples)/(n var alleles) -- so an entirely
|
||||
* homozygous variant receives a penalty of 1/2, while entirely het receives a (multiplicative) penalty of 1 (so no penalty)
|
||||
* This does not necessarily work well in the case of non-confident genotypes (could over or under penalize)
|
||||
*/
|
||||
public class QualByDepthV2 extends AnnotationByDepth implements ExperimentalAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
|
||||
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
if ( qDepth == 0 )
|
||||
return null;
|
||||
|
||||
double QbyD = hetHomAdjustment(vc) * 10.0 * vc.getNegLog10PError() / (double)qDepth;
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QbyD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD2"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||
|
||||
public double hetHomAdjustment(VariantContext vc) {
|
||||
int variantSamples = 0;
|
||||
int variantAlleles = 0;
|
||||
for ( Genotype g : vc.getGenotypesSortedByName() ) {
|
||||
if ( ! g.isFiltered() ) {
|
||||
if ( g.isHet() ) {
|
||||
variantSamples++;
|
||||
variantAlleles++;
|
||||
} else if ( g.isHomVar() ) {
|
||||
variantSamples++;
|
||||
variantAlleles += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (0.0+variantSamples)/(0.0+variantAlleles);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue