From 49a3db9dfe2bc18d9a5a4abd3023f6fa6e5144aa Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 13 Aug 2010 15:42:37 +0000 Subject: [PATCH] A brief implementation of a QD calculation that is not quite so bimodal for known variants (multiplicatively penalizes QD by (n variant samples)/(n variant alleles) ). Not sure how helpful this will be (which is why it is in oneoffs). Seems nice on MCKD1, but I'm still playing with the optimization. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4024 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/QualByDepthV2.java | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java new file mode 100755 index 000000000..8de8ee5f8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/QualByDepthV2.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.annotator; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeaderLineType; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.AnnotationByDepth; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A Qual By Depth calculation adjusted to account for allele counts by (n var samples)/(n var alleles) -- so an entirely + * homozygous variant receives a penalty of 1/2, while entirely het receives a (multiplicative) penalty of 1 (so no penalty) + * This does not necessarily work well in the case of non-confident genotypes (could over or under penalize) + */ +public class QualByDepthV2 extends AnnotationByDepth implements ExperimentalAnnotation { + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( stratifiedContexts.size() == 0 ) + return null; + + final Map genotypes = vc.getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) + return null; + + //double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts); + int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts); + if ( qDepth == 0 ) + return null; + + double QbyD = hetHomAdjustment(vc) * 10.0 * vc.getNegLog10PError() / (double)qDepth; + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%.2f", QbyD)); + return map; + } + + public List getKeyNames() { return Arrays.asList("QD2"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); } + + public double hetHomAdjustment(VariantContext vc) { + int variantSamples = 0; + int variantAlleles = 0; + for ( Genotype g : vc.getGenotypesSortedByName() ) { + if ( ! g.isFiltered() ) { + if ( g.isHet() ) { + variantSamples++; + variantAlleles++; + } else if ( g.isHomVar() ) { + variantSamples++; + variantAlleles += 2; + } + } + } + + return (0.0+variantSamples)/(0.0+variantAlleles); + } +}