diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 0e12ae937..13ba2f58a 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -63,10 +63,21 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { } public static String baseTestString(String file, String type, String region, String reference) { - final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : " --validationType " + type; + final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : excludeValidationTypesButString(type); return "-T ValidateVariants -R " + reference + " -L " + region + " --variant:vcf " + privateTestDir + file + typeArgString; } + private static String excludeValidationTypesButString(String type) { + if (type == "ALL") + return ""; + final ValidateVariants.ValidationType vtype = ValidateVariants.ValidationType.valueOf(type); + final StringBuilder sbuilder = new StringBuilder(); + for (final ValidateVariants.ValidationType t : ValidateVariants.ValidationType.CONCRETE_TYPES) + if (t != vtype) + sbuilder.append(" --validationTypeToExclude " + t.toString()); + return sbuilder.toString(); + } + @Test public void testGoodFile() { WalkerTestSpec spec = new WalkerTestSpec( @@ -124,12 +135,11 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testBadID() { - WalkerTestSpec spec = new WalkerTestSpec( + final WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, 0, UserException.FailsStrictValidation.class ); - executeTest("test bad RS ID", spec); } @@ -158,7 +168,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testNoValidation() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString("validationExampleBad.vcf", "NONE"), + baseTestString("validationExampleBad.vcf", "-ALL"), 0, Arrays.asList(emptyMd5) ); @@ -187,7 +197,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test(description = "Checks '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164") public void testUnusedAlleleError() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString("validationUnusedAllelesBugFix.vcf","ALL","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class); + baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class); executeTest("test unused allele bug fix", spec); } } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 59e9c8723..a9a4cfd53 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -53,43 +53,37 @@ import java.util.*; * *

* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. - * In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure - * the information contained within the file is correct. Checks include: - * p> - *

+ * In addition to standard adherence to the VCF specification, this tool performs extra strict validations to ensure + * the information contained within the file is correct. These include: + *

+ *

+ *
REF
the correctness of the reference base(s).
+ *
CHR_COUNTS
accuracy of AC & AN values.
+ *
IDS
tests against rsIDs when a dbSNP file is provided. Notice that for this one to work, you need + * to provide a reference to the dbsnp variant containing file using the --dbsnp as show in examples below.
+ *
ALLELES
and that all alternate alleles are present in at least one sample.
+ *
* *

* *

- * By default it will apply all the strict validations unless you indicate which one you want to perform - * using --validationType as indicated above. - * You can request this explicitly using --validationType ALL but this is not necessary. - *

- * + * By default it will apply all the strict validations unless you indicate which one you want you want to exclude + * using -Xtype|--validationTypeToExclude <code>, where code is one of the listed above. You + * can exclude as many types as you want *

- * If you are looking simply to test the adherence to the VCF specification, use --validationType NONE. - *

- * - *

- * If you want to perform all strict validations but a few you can list the ones to exclude adding - * as many '--validationTypeToExclude TYPE_ID' to the command line as needed. - * This is only possible when the validation type is the default 'ALL'. - * An attempt to combine --validationTypeToExclude with - * any other validation type will result in an error. + * Yo can exclude all strict validations with the special code ALL. In this case the tool will only + * test the adherence to the VCF specification. *

* *

Input

*

- * A variant set to validate. + * A variant set to validate using -V or --variant as shown below. *

* *

Examples

+ * + *

To perform VCF format and all strict validations:

+ * *
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
@@ -98,6 +92,27 @@ import java.util.*;
  *   --dbsnp dbsnp.vcf
  * 
* + *

To perform only VCF format tests:

+ * + *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T ValidateVariants \
+ *   --validationTypeToExclude ALL \
+ *   --variant input.vcf
+ * 
+ * + *

To perform all validations except the strict ALLELE validation:

+ * + *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T ValidateVariants \
+ *   --validationTypeToExclude ALLELES
+ *   --variant input.vcf \
+ *   --dbsnp dbsnp.vcf
+ * 
+ * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) @@ -110,8 +125,9 @@ public class ValidateVariants extends RodWalker { protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); public enum ValidationType { + /** - * Perform all extra-strict tests listed bellow. + * Makes reference to all extra-strict tests listed below. */ ALL, @@ -135,18 +151,23 @@ public class ValidateVariants extends RodWalker { * Check that the AN and AC annotations are consistent with the number of calls, alleles and then number these * are called across samples. */ - CHR_COUNTS, + CHR_COUNTS; /** - * Do not perform any extra-strict VCF content validation. Notice however that mis-formatted VCF would be reported - * as an error (e.g. garbled VCF file, mismatch between format fields and genotype fields number, wrong data types, - * or reference to non-existing alternative alleles. + * Unmodifiable set of concrete validation types. + * + *

These are all types except {@link #ALL}.

*/ - NONE - } + public final static Set CONCRETE_TYPES; - @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) - protected ValidationType type = ValidationType.ALL; + static { + final Set cts = new LinkedHashSet<>(values().length - 1); + for (final ValidationType v : values()) + if (v != ALL) + cts.add(v); + CONCRETE_TYPES = Collections.unmodifiableSet(cts); + } + } @Argument(fullName = "validationTypeToExclude", shortName = "Xtype", doc = "which validation type to exclude from a full strict validation", required = false) protected List excludeTypes = new ArrayList<>(); @@ -164,8 +185,14 @@ public class ValidateVariants extends RodWalker { private File file = null; + /** + * Contains final set of validation to apply. + */ + private Collection validationTypes; + public void initialize() { file = new File(variantCollection.variants.getSource()); + validationTypes = calculateValidationTypesToApply(excludeTypes); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -215,7 +242,7 @@ public class ValidateVariants extends RodWalker { } try { - for (final ValidationType t : calculateValidationTypesToApply(type,excludeTypes)) + for (final ValidationType t : validationTypes) applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t); } catch (TribbleException e) { if ( WARN_ON_ERROR ) { @@ -229,34 +256,27 @@ public class ValidateVariants extends RodWalker { /** * Given the validation type and exclusion type, calculate the final set of type to validate. - * @param type the validation type. - * @param excludeType types to exclude. + * @param excludeTypes types to exclude. * * @throws UserException.BadArgumentValue if the user combines any validation type except 'ALL' and some exclude types. * * @return never {@code null} but perhaps an empty set. */ - private Collection calculateValidationTypesToApply(final ValidationType type, final List excludeType) { - final Set excludeTypeSet = new LinkedHashSet<>(excludeType); - if (type == ValidationType.ALL) { - if (excludeTypeSet.size() == 0) - return Collections.singleton(ValidationType.ALL); - else { - if (excludeTypeSet.remove(ValidationType.NONE)) - logger.warn("found NONE in the --validationTypeToExclude list; does not have any effect"); - if (excludeTypeSet.contains(ValidationType.ALL)) - logger.warn("found ALL in the --validationTypeToExclude list. Perhaps you want to simply use --validationType NONE instead?"); - - final Set result = new LinkedHashSet<>(Arrays.asList(ValidationType.values())); - result.remove(ValidationType.ALL); - result.remove(ValidationType.NONE); - result.removeAll(excludeTypeSet); - return result; - } - } else if (excludeTypeSet.size() != 0) - throw new UserException.BadArgumentValue("--validationTypeToExclude","you can use --validationTypeToExclude ONLY in combination with the default --validationType ALL"); - else - return Collections.singleton(type); + private Collection calculateValidationTypesToApply(final List excludeTypes) { + if (excludeTypes.size() == 0) + return Collections.singleton(ValidationType.ALL); + final Set excludeTypeSet = new LinkedHashSet<>(excludeTypes); + if (excludeTypes.size() != excludeTypeSet.size()) + logger.warn("found repeat redundant validation types listed using the --validationTypeToExclude argument"); + if (excludeTypeSet.contains(ValidationType.ALL)) { + if (excludeTypeSet.size() > 1) + logger.warn("found ALL in the --validationTypeToExclude list together with other concrete type exclusions that are redundant"); + return Collections.emptyList(); + } else { + final Set result = new LinkedHashSet<>(ValidationType.CONCRETE_TYPES); + result.removeAll(excludeTypeSet); + return result; + } } private void applyValidationType(VariantContext vc, Allele reportedRefAllele, Allele observedRefAllele, Set rsIDs, ValidationType t) {