From f52f1f659f9c1d9a0eccc18830dd8e9c7e037cdc Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 10 Feb 2012 14:15:59 -0500 Subject: [PATCH] Multiallelic implementation of the TDT should be a pairwise list of values as per Mark Daly. Integration tests change because the count in the header is now A instead of 1. --- .../TransmissionDisequilibriumTest.java | 45 ++++++++----------- .../VariantAnnotatorIntegrationTest.java | 2 +- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index d84ba44bc..1f8ccf652 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -58,41 +59,33 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen // return the descriptions used for the VCF INFO meta field public List getKeyNames() { return Arrays.asList("TDT"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT - private double calculateTDT( final VariantContext vc, final Set triosToTest ) { + private List calculateTDT( final VariantContext vc, final Set triosToTest ) { - double nABGivenABandBB = 0.0; - double nBBGivenABandBB = 0.0; - double nAAGivenABandAB = 0.0; - double nBBGivenABandAB = 0.0; - double nAAGivenAAandAB = 0.0; - double nABGivenAAandAB = 0.0; + List pairwiseTDTs = new ArrayList(10); + final int HomRefIndex = 0; // for each pair of alleles, add the likelihoods - int numAlleles = vc.getNAlleles(); - for ( int allele1 = 0; allele1 < numAlleles; allele1++ ) { - final int HOM1index = determineHomIndex(allele1, numAlleles); + int numAltAlleles = vc.getAlternateAlleles().size(); + for ( int alt = 1; alt <= numAltAlleles; alt++ ) { + final int HetIndex = alt; + final int HomVarIndex = determineHomIndex(alt, numAltAlleles+1); - for ( int allele2 = allele1 + 1; allele2 < numAlleles; allele2++ ) { + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HetIndex, HomVarIndex, HetIndex); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HomVarIndex, HomVarIndex, HetIndex); + final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HetIndex); + final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HetIndex); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HomRefIndex); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HetIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomRefIndex); - // TODO -- cache these for better performance - final int HETindex = HOM1index + (allele2 - allele1); - final int HOM2index = determineHomIndex(allele2, numAlleles); - - nABGivenABandBB += calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HETindex, HOM2index, HETindex); - nBBGivenABandBB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HOM2index, HOM2index, HETindex); - nAAGivenABandAB += calculateNChildren(vc, triosToTest, HOM1index, HETindex, HETindex); - nBBGivenABandAB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HETindex); - nAAGivenAAandAB += calculateNChildren(vc, triosToTest, HOM1index, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HOM1index, HETindex, HOM1index); - nABGivenAAandAB += calculateNChildren(vc, triosToTest, HETindex, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM1index); - } + final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); + final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); + pairwiseTDTs.add((numer * numer) / denom); } - final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); - final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); - return (numer * numer) / denom; + return pairwiseTDTs; } private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 0d9d9bcd8..7984a00c0 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "0aedd760e8099f0b95d53a41bdcd793e"; + final String MD5 = "a78c1e950740d3c13c0258960c5fa8e1"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,