From b6a1967f12a92daad681916a238699c0ab4b4b7e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 29 Oct 2012 21:47:09 -0400 Subject: [PATCH] Better documentation for ValidateVariants so that people realize it's used for strict validation of the VCF file. Added an option to turn off strict validation and an integration test to cover it. --- .../variantutils/ValidateVariants.java | 15 +++++---- .../ValidateVariantsIntegrationTest.java | 31 +++++++++++++------ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 2f4d24312..3e6ab050a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -46,16 +46,20 @@ import java.util.Set; /** - * Strictly validates a variants file. + * Validates a VCF file with an extra strict set of criteria. * *

* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. - * Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs - * when a dbSNP file is provided, and that all alternate alleles are present in at least one sample. + * In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure + * the information contained within the file is correct. Checks include the correctness of the reference base(s), + * accuracy of AC & AN values, tests against rsIDs when a dbSNP file is provided, and that all alternate alleles + * are present in at least one sample. + * + * If you are looking simply to test the adherence to the VCF specification, use --validationType NONE. * *

Input

*

- * A variant set to filter. + * A variant set to validate. *

* *

Examples

@@ -79,10 +83,9 @@ public class ValidateVariants extends RodWalker { protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); public enum ValidationType { - ALL, REF, IDS, ALLELES, CHR_COUNTS + ALL, REF, IDS, ALLELES, CHR_COUNTS, NONE } - @Hidden @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) protected ValidationType type = ValidationType.ALL; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 6a3d755d7..67d47997b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -33,6 +33,8 @@ import java.util.Arrays; public class ValidateVariantsIntegrationTest extends WalkerTest { + protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e"; + public static String baseTestString(String file, String type) { return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type; } @@ -42,7 +44,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleGood.vcf", "ALL"), 0, - Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + Arrays.asList(emptyMd5) ); executeTest("test good file", spec); @@ -53,7 +55,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref base #1", spec); @@ -64,7 +66,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad2.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref base #2", spec); @@ -75,7 +77,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "CHR_COUNTS"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad chr counts #1", spec); @@ -86,7 +88,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad2.vcf", "CHR_COUNTS"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad chr counts #2", spec); @@ -97,7 +99,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad RS ID", spec); @@ -108,7 +110,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "ALLELES"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad alt allele", spec); @@ -119,18 +121,29 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad3.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref allele in deletion", spec); } + @Test + public void testNoValidation() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("validationExampleBad.vcf", "NONE"), + 0, + Arrays.asList(emptyMd5) + ); + + executeTest("test no validation", spec); + } + @Test public void testComplexEvents() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("complexEvents.vcf", "ALL"), 0, - Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + Arrays.asList(emptyMd5) ); executeTest("test validating complex events", spec);