From 3069a689fe7dcca14200a6ed1ba1fdff3a31dbf8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:04:33 -0500 Subject: [PATCH 2/4] Bug fix: if there are multiple records at a given position, it turns out that SelectVariants would drop all variants that follow after one that fails filters (instead of dropping just the failing one). Added an integration test to cover this case. --- .../walkers/variantutils/SelectVariants.java | 20 +++++++++++-------- .../SelectVariantsIntegrationTest.java | 13 ++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 0af351750..6d94ffe6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -493,12 +493,12 @@ public class SelectVariants extends RodWalker implements TreeR if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) - return 0; + continue; } if (CONCORDANCE_ONLY) { Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) - return 0; + continue; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) @@ -512,16 +512,20 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, samples); if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { + boolean failedJexlMatch = false; for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { if ( !VariantContextUtils.match(sub, jexl) ) { - return 0; + failedJexlMatch = true; + break; } } - if (SELECT_RANDOM_NUMBER) { - randomlyAddVariant(++variantNumber, sub); - } - else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(sub); + if ( !failedJexlMatch ) { + if (SELECT_RANDOM_NUMBER) { + randomlyAddVariant(++variantNumber, sub); + } + else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { + vcfWriter.add(sub); + } } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 72a07bd0e..042de2a27 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } + @Test + public void testMultipleRecordsAtOnePosition() { + String testFile = validationDataLocation + "selectVariants.onePosition.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("20b52c96f5c48258494d072752b53693") + ); + + executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec); + } + @Test public void testParallelization() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; From 5fd19ae734b8cc94ee276c8909b02b2e47c787a1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:19:00 -0500 Subject: [PATCH 3/4] Commented exactly how the results are represented from the exact model so developers can know how to use them. --- .../genotyper/AlleleFrequencyCalculationResult.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index cc72fde11..ed80dce0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -34,10 +34,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; */ public class AlleleFrequencyCalculationResult { - // note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles) + // IMPORTANT NOTE: + // These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N). + // For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that + // the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may + // be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0). + // In the bi-allelic case (where there are no other alternate alleles over which to marginalize), + // the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED. final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyPosteriors; + // These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) double log10LikelihoodOfAFzero = 0.0; double log10PosteriorOfAFzero = 0.0; From 16cc2b864e3098e96eaff5727a5583775e392442 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 19 Dec 2011 13:41:47 +0100 Subject: [PATCH 4/4] - Corrected bug causing cases where both parents are HET to be accounted twice in the TDT calculation - Adapted TDT Integration test to corrected version of TDT Signed-off-by: Ryan Poplin --- .../annotator/TransmissionDisequilibriumTest.java | 15 +++++++-------- .../VariantAnnotatorIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 6cc8923e8..ecdde1e4f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT private double calculateTDT( final VariantContext vc, final Set triosToTest ) { - final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); - final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); - final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); - final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); return (numer * numer) / denom; } - private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { - final double likelihoodVector[] = new double[triosToTest.size() * 2]; + private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) { + final double likelihoodVector[] = new double[triosToTest.size()]; int iii = 0; for( final Sample child : triosToTest ) { final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); - likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; - likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx]; } return MathUtils.sumLog10(likelihoodVector); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index ffb9aedcc..245fda3c7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + final String MD5 = "204e67536a17af7eaa6bf0a910818997"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,