Change in the command line interface of ValidateVariants.

Following reviewers comments the command line interface has been simplified.
All extra strict validations are performed by default (as before) and the
user has to indicate which one he/she does not want to use with --validationTypeToExclude.

Before he/she was able to indicate the only ones to apply with --validationType but that has been scrapped out.

Stories:

    - https://www.pivotaltracker.com/story/show/68725164

Changes:

    - Removed validateType argument.
    - Improved documentation.
    - Added some warnning log message on suspicious argument combinations.

Tests:

    - ValidateVariantsIntegrationTest#*
This commit is contained in:
Valentin Ruano-Rubio 2014-04-07 12:22:19 -04:00
parent 18deeec6b0
commit 5afcc8e05f
2 changed files with 93 additions and 63 deletions

View File

@ -63,10 +63,21 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
} }
public static String baseTestString(String file, String type, String region, String reference) { public static String baseTestString(String file, String type, String region, String reference) {
final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : " --validationType " + type; final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : excludeValidationTypesButString(type);
return "-T ValidateVariants -R " + reference + " -L " + region + " --variant:vcf " + privateTestDir + file + typeArgString; return "-T ValidateVariants -R " + reference + " -L " + region + " --variant:vcf " + privateTestDir + file + typeArgString;
} }
private static String excludeValidationTypesButString(String type) {
if (type == "ALL")
return "";
final ValidateVariants.ValidationType vtype = ValidateVariants.ValidationType.valueOf(type);
final StringBuilder sbuilder = new StringBuilder();
for (final ValidateVariants.ValidationType t : ValidateVariants.ValidationType.CONCRETE_TYPES)
if (t != vtype)
sbuilder.append(" --validationTypeToExclude " + t.toString());
return sbuilder.toString();
}
@Test @Test
public void testGoodFile() { public void testGoodFile() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
@ -124,12 +135,11 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
@Test @Test
public void testBadID() { public void testBadID() {
WalkerTestSpec spec = new WalkerTestSpec( final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
0, 0,
UserException.FailsStrictValidation.class UserException.FailsStrictValidation.class
); );
executeTest("test bad RS ID", spec); executeTest("test bad RS ID", spec);
} }
@ -158,7 +168,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
@Test @Test
public void testNoValidation() { public void testNoValidation() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "NONE"), baseTestString("validationExampleBad.vcf", "-ALL"),
0, 0,
Arrays.asList(emptyMd5) Arrays.asList(emptyMd5)
); );
@ -187,7 +197,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
@Test(description = "Checks '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164") @Test(description = "Checks '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
public void testUnusedAlleleError() { public void testUnusedAlleleError() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationUnusedAllelesBugFix.vcf","ALL","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class); baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
executeTest("test unused allele bug fix", spec); executeTest("test unused allele bug fix", spec);
} }
} }

View File

@ -53,43 +53,37 @@ import java.util.*;
* *
* <p> * <p>
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. * ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
* In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure * In addition to standard adherence to the VCF specification, this tool performs extra strict validations to ensure
* the information contained within the file is correct. Checks include: * the information contained within the file is correct. These include:
* </>p> * </p><p>
* <ul> * <dl>
* * <dt>REF</dt><dd>the correctness of the reference base(s).</dd>
* <li>the correctness of the reference base(s), [--validationType REF]</li> * <dt>CHR_COUNTS</dt><dd>accuracy of AC & AN values.</dd>
* <li>accuracy of AC & AN values, [--validationType CHR_COUNTS]</li> * <dt>IDS</dt><dd>tests against rsIDs when a dbSNP file is provided. Notice that for this one to work, you need
* <li>tests against rsIDs when a dbSNP file is provided, [--validationType IDS]</li> * to provide a reference to the dbsnp variant containing file using the <code>--dbsnp</code> as show in examples below.</dd>
* <li>and that all alternate alleles are present in at least one sample. [--validationType ALLELES]</li> * <dt>ALLELES</dt><dd>and that all alternate alleles are present in at least one sample.</dd>
* </ul> * </dl>
* *
* </p> * </p>
* *
* <p> * <p>
* By default it will apply all the strict validations unless you indicate which one you want to perform * By default it will apply all the strict validations unless you indicate which one you want you want to exclude
* using --validationType as indicated above. * using <code>-Xtype|--validationTypeToExclude &lt;<i>code</i>&gt;</code>, where <i>code</i> is one of the listed above. You
* You can request this explicitly using --validationType ALL but this is not necessary. * can exclude as many types as you want
* </p>
*
* <p> * <p>
* If you are looking simply to test the adherence to the VCF specification, use --validationType NONE. * Yo can exclude all strict validations with the special code <code><b>ALL</b></code>. In this case the tool will only
* </p> * test the adherence to the VCF specification.
*
* <p>
* If you want to perform all strict validations but a few you can list the ones to exclude adding
* as many '--validationTypeToExclude TYPE_ID' to the command line as needed.
* This is only possible when the validation type is the default 'ALL'.
* An attempt to combine --validationTypeToExclude with
* any other validation type will result in an error.
* </p> * </p>
* *
* <h3>Input</h3> * <h3>Input</h3>
* <p> * <p>
* A variant set to validate. * A variant set to validate using <code>-V</code> or <code>--variant</code> as shown below.
* </p> * </p>
* *
* <h3>Examples</h3> * <h3>Examples</h3>
*
* <p>To perform VCF format and all strict validations: </p>
*
* <pre> * <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \ * java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \ * -R ref.fasta \
@ -98,6 +92,27 @@ import java.util.*;
* --dbsnp dbsnp.vcf * --dbsnp dbsnp.vcf
* </pre> * </pre>
* *
* <p>To perform only VCF format tests:</p>
*
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ValidateVariants \
* <b>--validationTypeToExclude ALL</b> \
* --variant input.vcf
* </pre>
*
* <p>To perform all validations except the strict <i>ALLELE</i> validation:</p>
*
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ValidateVariants \
* <b>--validationTypeToExclude ALLELES</b>
* --variant input.vcf \
* --dbsnp dbsnp.vcf
* </pre>
*
*/ */
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=0,stop=100)) @Reference(window=@Window(start=0,stop=100))
@ -110,8 +125,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public enum ValidationType { public enum ValidationType {
/** /**
* Perform all extra-strict tests listed bellow. * Makes reference to all extra-strict tests listed below.
*/ */
ALL, ALL,
@ -135,18 +151,23 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
* Check that the AN and AC annotations are consistent with the number of calls, alleles and then number these * Check that the AN and AC annotations are consistent with the number of calls, alleles and then number these
* are called across samples. * are called across samples.
*/ */
CHR_COUNTS, CHR_COUNTS;
/** /**
* Do not perform any extra-strict VCF content validation. Notice however that mis-formatted VCF would be reported * Unmodifiable set of concrete validation types.
* as an error (e.g. garbled VCF file, mismatch between format fields and genotype fields number, wrong data types, *
* or reference to non-existing alternative alleles. * <p>These are all types except {@link #ALL}.</p>
*/ */
NONE public final static Set<ValidationType> CONCRETE_TYPES;
}
@Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) static {
protected ValidationType type = ValidationType.ALL; final Set<ValidationType> cts = new LinkedHashSet<>(values().length - 1);
for (final ValidationType v : values())
if (v != ALL)
cts.add(v);
CONCRETE_TYPES = Collections.unmodifiableSet(cts);
}
}
@Argument(fullName = "validationTypeToExclude", shortName = "Xtype", doc = "which validation type to exclude from a full strict validation", required = false) @Argument(fullName = "validationTypeToExclude", shortName = "Xtype", doc = "which validation type to exclude from a full strict validation", required = false)
protected List<ValidationType> excludeTypes = new ArrayList<>(); protected List<ValidationType> excludeTypes = new ArrayList<>();
@ -164,8 +185,14 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
private File file = null; private File file = null;
/**
* Contains final set of validation to apply.
*/
private Collection<ValidationType> validationTypes;
public void initialize() { public void initialize() {
file = new File(variantCollection.variants.getSource()); file = new File(variantCollection.variants.getSource());
validationTypes = calculateValidationTypesToApply(excludeTypes);
} }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -215,7 +242,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
} }
try { try {
for (final ValidationType t : calculateValidationTypesToApply(type,excludeTypes)) for (final ValidationType t : validationTypes)
applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t); applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t);
} catch (TribbleException e) { } catch (TribbleException e) {
if ( WARN_ON_ERROR ) { if ( WARN_ON_ERROR ) {
@ -229,34 +256,27 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
/** /**
* Given the validation type and exclusion type, calculate the final set of type to validate. * Given the validation type and exclusion type, calculate the final set of type to validate.
* @param type the validation type. * @param excludeTypes types to exclude.
* @param excludeType types to exclude.
* *
* @throws UserException.BadArgumentValue if the user combines any validation type except 'ALL' and some exclude types. * @throws UserException.BadArgumentValue if the user combines any validation type except 'ALL' and some exclude types.
* *
* @return never {@code null} but perhaps an empty set. * @return never {@code null} but perhaps an empty set.
*/ */
private Collection<ValidationType> calculateValidationTypesToApply(final ValidationType type, final List<ValidationType> excludeType) { private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeType); if (excludeTypes.size() == 0)
if (type == ValidationType.ALL) { return Collections.singleton(ValidationType.ALL);
if (excludeTypeSet.size() == 0) final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
return Collections.singleton(ValidationType.ALL); if (excludeTypes.size() != excludeTypeSet.size())
else { logger.warn("found repeat redundant validation types listed using the --validationTypeToExclude argument");
if (excludeTypeSet.remove(ValidationType.NONE)) if (excludeTypeSet.contains(ValidationType.ALL)) {
logger.warn("found NONE in the --validationTypeToExclude list; does not have any effect"); if (excludeTypeSet.size() > 1)
if (excludeTypeSet.contains(ValidationType.ALL)) logger.warn("found ALL in the --validationTypeToExclude list together with other concrete type exclusions that are redundant");
logger.warn("found ALL in the --validationTypeToExclude list. Perhaps you want to simply use --validationType NONE instead?"); return Collections.emptyList();
} else {
final Set<ValidationType> result = new LinkedHashSet<>(Arrays.asList(ValidationType.values())); final Set<ValidationType> result = new LinkedHashSet<>(ValidationType.CONCRETE_TYPES);
result.remove(ValidationType.ALL); result.removeAll(excludeTypeSet);
result.remove(ValidationType.NONE); return result;
result.removeAll(excludeTypeSet); }
return result;
}
} else if (excludeTypeSet.size() != 0)
throw new UserException.BadArgumentValue("--validationTypeToExclude","you can use --validationTypeToExclude ONLY in combination with the default --validationType ALL");
else
return Collections.singleton(type);
} }
private void applyValidationType(VariantContext vc, Allele reportedRefAllele, Allele observedRefAllele, Set<String> rsIDs, ValidationType t) { private void applyValidationType(VariantContext vc, Allele reportedRefAllele, Allele observedRefAllele, Set<String> rsIDs, ValidationType t) {