From 110298322cbb1ed8cb1e191b5d00f3d145628a9a Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 29 Nov 2011 09:29:18 -0500 Subject: [PATCH] Adding Transmission Disequilibrium Test annotation to VariantAnnotator and integration test to test it. --- .../TransmissionDisequilibriumTest.java | 102 ++++++++++++++++++ .../walkers/annotator/VariantAnnotator.java | 6 ++ .../sting/utils/MendelianViolation.java | 2 +- .../VariantAnnotatorIntegrationTest.java | 11 ++ 4 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java new file mode 100644 index 000000000..3de179365 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -0,0 +1,102 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.FileNotFoundException; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 11/14/11 + */ + +public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private Set fullMVSet = null; + private final static int REF = 0; + private final static int HET = 1; + private final static int HOM = 2; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( fullMVSet == null ) { + fullMVSet = new HashSet(); + + if ( walker instanceof VariantAnnotator ) { + final Map> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); + for( final Set family : families.values() ) { + for( final Sample sample : family ) { + if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now + fullMVSet.add( new MendelianViolation(sample, 0.0) ); + } + } + } + } else { + throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); + } + } + + final Map toRet = new HashMap(1); + final HashSet mvsToTest = new HashSet(); + + for( final MendelianViolation mv : fullMVSet ) { + final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); + if ( hasAppropriateGenotypes ) { + mvsToTest.add(mv); + } + } + + toRet.put("TDT", calculateTDT( vc, mvsToTest )); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("TDT"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } + + // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT + private double calculateTDT( final VariantContext vc, final Set mvsToTest ) { + + final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); + final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); + final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); + final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); + final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); + + final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); + final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); + return (numer * numer) / denom; + } + + private double calculateNChildren( final VariantContext vc, final Set mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { + final double likelihoodVector[] = new double[mvsToTest.size() * 2]; + int iii = 0; + for( final MendelianViolation mv : mvsToTest ) { + final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); + final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); + final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); + likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + } + + return MathUtils.sumLog10(likelihoodVector); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c9ea7a3b5..143f2eb2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -196,6 +197,11 @@ public class VariantAnnotator extends RodWalker implements Ann System.exit(0); } + @Override + public SampleDB getSampleDB() { + return super.getSampleDB(); + } + /** * Prepare the output file and the list of available features. */ diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index cf45dab79..d0579fc25 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -77,7 +77,7 @@ public class MendelianViolation { /** * An alternative to the more general constructor if you want to get the Sample information from the engine yourself. * @param sample - the sample object extracted from the sample metadata YAML file given to the engine. - * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation + * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation */ public MendelianViolation(Sample sample, double minGenotypeQualityP) { sampleMom = sample.getMother().getID(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 1824789a9..ffb9aedcc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -168,4 +168,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { ); executeTest("Testing SnpEff annotations (unsupported version)", spec); } + + @Test + public void testTDTAnnotation() { + final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, + Arrays.asList(MD5)); + executeTest("Testing TDT annotation", spec); + } + }