diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index ebdd3a17a..13ba2f58a 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -55,9 +55,27 @@ import java.util.Arrays; public class ValidateVariantsIntegrationTest extends WalkerTest { protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e"; + protected static final String defaultRegion = "1:10001292-10001303"; - public static String baseTestString(String file, String type) { - return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type; + + public static String baseTestString(final String file, String type) { + return baseTestString(file,type,defaultRegion,b36KGReference); + } + + public static String baseTestString(String file, String type, String region, String reference) { + final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : excludeValidationTypesButString(type); + return "-T ValidateVariants -R " + reference + " -L " + region + " --variant:vcf " + privateTestDir + file + typeArgString; + } + + private static String excludeValidationTypesButString(String type) { + if (type == "ALL") + return ""; + final ValidateVariants.ValidationType vtype = ValidateVariants.ValidationType.valueOf(type); + final StringBuilder sbuilder = new StringBuilder(); + for (final ValidateVariants.ValidationType t : ValidateVariants.ValidationType.CONCRETE_TYPES) + if (t != vtype) + sbuilder.append(" --validationTypeToExclude " + t.toString()); + return sbuilder.toString(); } @Test @@ -117,12 +135,11 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testBadID() { - WalkerTestSpec spec = new WalkerTestSpec( + final WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, 0, UserException.FailsStrictValidation.class ); - executeTest("test bad RS ID", spec); } @@ -151,7 +168,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testNoValidation() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString("validationExampleBad.vcf", "NONE"), + baseTestString("validationExampleBad.vcf", "-ALL"), 0, Arrays.asList(emptyMd5) ); @@ -169,4 +186,18 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { executeTest("test validating complex events", spec); } + + @Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164") + public void testUnusedAlleleFix() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(emptyMd5)); + executeTest("test unused allele bug fix", spec); + } + + @Test(description = "Checks '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164") + public void testUnusedAlleleError() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class); + executeTest("test unused allele bug fix", spec); + } } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index d11cf5aee..a9a4cfd53 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -26,26 +26,26 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.help.HelpConstants; -import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFConstants; import java.io.File; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; +import java.util.*; /** @@ -53,19 +53,37 @@ import java.util.Set; * *
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. - * In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure - * the information contained within the file is correct. Checks include the correctness of the reference base(s), - * accuracy of AC & AN values, tests against rsIDs when a dbSNP file is provided, and that all alternate alleles - * are present in at least one sample. + * In addition to standard adherence to the VCF specification, this tool performs extra strict validations to ensure + * the information contained within the file is correct. These include: + *
+ *
--dbsnp as show in examples below.
+ * By default it will apply all the strict validations unless you indicate which one you want you want to exclude
+ * using -Xtype|--validationTypeToExclude <code>, where code is one of the listed above. You
+ * can exclude as many types as you want
+ *
+ * Yo can exclude all strict validations with the special code ALL. In this case the tool will only
+ * test the adherence to the VCF specification.
+ *
- * A variant set to validate.
+ * A variant set to validate using -V or --variant as shown below.
*
To perform VCF format and all strict validations:
+ * ** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ @@ -74,6 +92,27 @@ import java.util.Set; * --dbsnp dbsnp.vcf ** + *
To perform only VCF format tests:
+ * + *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -R ref.fasta \ + * -T ValidateVariants \ + * --validationTypeToExclude ALL \ + * --variant input.vcf + *+ * + *
To perform all validations except the strict ALLELE validation:
+ * + *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -R ref.fasta \ + * -T ValidateVariants \ + * --validationTypeToExclude ALLELES + * --variant input.vcf \ + * --dbsnp dbsnp.vcf + *+ * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) @@ -86,11 +125,52 @@ public class ValidateVariants extends RodWalker
These are all types except {@link #ALL}.
+ */ + public final static Set