From 596c1723aeec43ebf2feb7ede387b1954910736c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 25 Oct 2012 10:35:43 -0400 Subject: [PATCH] Hidden, unsupported ability of VariantEval to run AlleleCount stratification on sites-only VCFs. I'll expose it/add tests on it if people think this is generaly useful. User needs to specify total # of samples as command line argument since genotypes are not available. Also, fixes to large-scale validation script: lower -minIndelFrac threshold or else we'll kill most indels since default 0.25 is too high for pools, fix also VE stratifications and add one VE run where eval=1KG, comp=pool data and AC stratification based on 1KG annotation --- .../sting/gatk/walkers/varianteval/VariantEval.java | 12 ++++++++++++ .../varianteval/stratifications/AlleleCount.java | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index a73e125ad..201028d99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -183,6 +183,10 @@ public class VariantEval extends RodWalker implements TreeRedu @Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false) private boolean keepSitesWithAC0 = false; + @Hidden + @Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false) + private int numSamplesFromArgument = 0; + /** * If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying * variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc. @@ -589,6 +593,14 @@ public class VariantEval extends RodWalker implements TreeRedu public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; } public Set getSampleNamesForEvaluation() { return sampleNamesForEvaluation; } + public int getNumberOfSamplesForEvaluation() { + if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty()) + return sampleNamesForEvaluation.size(); + else { + return numSamplesFromArgument; + } + + } public Set getSampleNamesForStratification() { return sampleNamesForStratification; } public List> getComps() { return comps; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index e6efd4482..7197fc14c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier { // There are ploidy x n sample chromosomes // TODO -- generalize to handle multiple ploidy - nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy(); + nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy(); if ( nchrom < 2 ) throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");