Multiallelic implementation of the TDT should be a pairwise list of values as per Mark Daly. Integration tests change because the count in the header is now A instead of 1.
This commit is contained in:
parent
f1990981fc
commit
f52f1f659f
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -58,41 +59,33 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||||
|
|
||||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||||
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
private List<Double> calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||||
|
|
||||||
double nABGivenABandBB = 0.0;
|
List<Double> pairwiseTDTs = new ArrayList<Double>(10);
|
||||||
double nBBGivenABandBB = 0.0;
|
final int HomRefIndex = 0;
|
||||||
double nAAGivenABandAB = 0.0;
|
|
||||||
double nBBGivenABandAB = 0.0;
|
|
||||||
double nAAGivenAAandAB = 0.0;
|
|
||||||
double nABGivenAAandAB = 0.0;
|
|
||||||
|
|
||||||
// for each pair of alleles, add the likelihoods
|
// for each pair of alleles, add the likelihoods
|
||||||
int numAlleles = vc.getNAlleles();
|
int numAltAlleles = vc.getAlternateAlleles().size();
|
||||||
for ( int allele1 = 0; allele1 < numAlleles; allele1++ ) {
|
for ( int alt = 1; alt <= numAltAlleles; alt++ ) {
|
||||||
final int HOM1index = determineHomIndex(allele1, numAlleles);
|
final int HetIndex = alt;
|
||||||
|
final int HomVarIndex = determineHomIndex(alt, numAltAlleles+1);
|
||||||
|
|
||||||
for ( int allele2 = allele1 + 1; allele2 < numAlleles; allele2++ ) {
|
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HetIndex, HomVarIndex, HetIndex);
|
||||||
|
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HomVarIndex, HomVarIndex, HetIndex);
|
||||||
|
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HetIndex);
|
||||||
|
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HetIndex);
|
||||||
|
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HomRefIndex);
|
||||||
|
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HetIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomRefIndex);
|
||||||
|
|
||||||
// TODO -- cache these for better performance
|
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||||
final int HETindex = HOM1index + (allele2 - allele1);
|
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||||
final int HOM2index = determineHomIndex(allele2, numAlleles);
|
pairwiseTDTs.add((numer * numer) / denom);
|
||||||
|
|
||||||
nABGivenABandBB += calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HETindex, HOM2index, HETindex);
|
|
||||||
nBBGivenABandBB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HOM2index, HOM2index, HETindex);
|
|
||||||
nAAGivenABandAB += calculateNChildren(vc, triosToTest, HOM1index, HETindex, HETindex);
|
|
||||||
nBBGivenABandAB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HETindex);
|
|
||||||
nAAGivenAAandAB += calculateNChildren(vc, triosToTest, HOM1index, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HOM1index, HETindex, HOM1index);
|
|
||||||
nABGivenAAandAB += calculateNChildren(vc, triosToTest, HETindex, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM1index);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
return pairwiseTDTs;
|
||||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
|
||||||
return (numer * numer) / denom;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTDTAnnotation() {
|
public void testTDTAnnotation() {
|
||||||
final String MD5 = "0aedd760e8099f0b95d53a41bdcd793e";
|
final String MD5 = "a78c1e950740d3c13c0258960c5fa8e1";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||||
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue