A brief implementation of a QD calculation that is not quite so bimodal for known variants (multiplicatively penalizes QD by (n variant samples)/(n variant alleles) ). Not sure how helpful this will be (which is why it is in oneoffs). Seems nice on MCKD1, but I'm still playing with the optimization.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4024 348d0f76-0448-11de-a6fe-93d51630548a
2010-08-13 15:42:37 +00:00 · 2010-08-13 15:42:37 +00:00 · 49a3db9dfe
parent c6a8fba922
commit 49a3db9dfe
1 changed files with 64 additions and 0 deletions
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java
@ -0,0 +1,64 @@
+package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
+
+import org.broad.tribble.util.variantcontext.Genotype;
+import org.broad.tribble.util.variantcontext.VariantContext;
+import org.broad.tribble.vcf.VCFHeaderLineType;
+import org.broad.tribble.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.annotator.AnnotationByDepth;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A Qual By Depth calculation adjusted to account for allele counts by (n var samples)/(n var alleles) -- so an entirely
+ * homozygous variant receives a penalty of 1/2, while entirely het receives a (multiplicative) penalty of 1 (so no penalty)
+ * This does not necessarily work well in the case of non-confident genotypes (could over or under penalize)
+ */
+public class QualByDepthV2 extends AnnotationByDepth implements ExperimentalAnnotation {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
+        if ( stratifiedContexts.size() == 0 )
+            return null;
+
+        final Map<String, Genotype> genotypes = vc.getGenotypes();
+        if ( genotypes == null || genotypes.size() == 0 )
+            return null;
+
+        //double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
+        int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
+        if ( qDepth == 0 )
+            return null;
+
+        double QbyD = hetHomAdjustment(vc) * 10.0 * vc.getNegLog10PError() / (double)qDepth;
+        Map<String, Object> map = new HashMap<String, Object>();
+        map.put(getKeyNames().get(0), String.format("%.2f", QbyD));
+        return map;
+    }
+
+    public List<String> getKeyNames() { return Arrays.asList("QD2"); }
+
+    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
+
+    public double hetHomAdjustment(VariantContext vc) {
+        int variantSamples = 0;
+        int variantAlleles = 0;
+        for ( Genotype g : vc.getGenotypesSortedByName() ) {
+            if ( ! g.isFiltered() ) {
+                if ( g.isHet() ) {
+                    variantSamples++;
+                    variantAlleles++;
+                } else if ( g.isHomVar() ) {
+                    variantSamples++;
+                    variantAlleles += 2;
+                }
+            }
+        }
+
+        return (0.0+variantSamples)/(0.0+variantAlleles);
+    }
+}