Change in the command line interface of ValidateVariants.
Following reviewers comments the command line interface has been simplified.
All extra strict validations are performed by default (as before) and the
user has to indicate which one he/she does not want to use with --validationTypeToExclude.
Before he/she was able to indicate the only ones to apply with --validationType but that has been scrapped out.
Stories:
- https://www.pivotaltracker.com/story/show/68725164
Changes:
- Removed validateType argument.
- Improved documentation.
- Added some warnning log message on suspicious argument combinations.
Tests:
- ValidateVariantsIntegrationTest#*
This commit is contained in:
parent
18deeec6b0
commit
5afcc8e05f
|
|
@ -63,10 +63,21 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
public static String baseTestString(String file, String type, String region, String reference) {
|
||||
final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : " --validationType " + type;
|
||||
final String typeArgString = type.startsWith("-") ? " --validationTypeToExclude " + type.substring(1) : excludeValidationTypesButString(type);
|
||||
return "-T ValidateVariants -R " + reference + " -L " + region + " --variant:vcf " + privateTestDir + file + typeArgString;
|
||||
}
|
||||
|
||||
private static String excludeValidationTypesButString(String type) {
|
||||
if (type == "ALL")
|
||||
return "";
|
||||
final ValidateVariants.ValidationType vtype = ValidateVariants.ValidationType.valueOf(type);
|
||||
final StringBuilder sbuilder = new StringBuilder();
|
||||
for (final ValidateVariants.ValidationType t : ValidateVariants.ValidationType.CONCRETE_TYPES)
|
||||
if (t != vtype)
|
||||
sbuilder.append(" --validationTypeToExclude " + t.toString());
|
||||
return sbuilder.toString();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGoodFile() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -124,12 +135,11 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testBadID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
|
||||
0,
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad RS ID", spec);
|
||||
}
|
||||
|
||||
|
|
@ -158,7 +168,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoValidation() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "NONE"),
|
||||
baseTestString("validationExampleBad.vcf", "-ALL"),
|
||||
0,
|
||||
Arrays.asList(emptyMd5)
|
||||
);
|
||||
|
|
@ -187,7 +197,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
@Test(description = "Checks '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
|
||||
public void testUnusedAlleleError() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationUnusedAllelesBugFix.vcf","ALL","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
|
||||
baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
|
||||
executeTest("test unused allele bug fix", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,43 +53,37 @@ import java.util.*;
|
|||
*
|
||||
* <p>
|
||||
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
|
||||
* In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure
|
||||
* the information contained within the file is correct. Checks include:
|
||||
* </>p>
|
||||
* <ul>
|
||||
*
|
||||
* <li>the correctness of the reference base(s), [--validationType REF]</li>
|
||||
* <li>accuracy of AC & AN values, [--validationType CHR_COUNTS]</li>
|
||||
* <li>tests against rsIDs when a dbSNP file is provided, [--validationType IDS]</li>
|
||||
* <li>and that all alternate alleles are present in at least one sample. [--validationType ALLELES]</li>
|
||||
* </ul>
|
||||
* In addition to standard adherence to the VCF specification, this tool performs extra strict validations to ensure
|
||||
* the information contained within the file is correct. These include:
|
||||
* </p><p>
|
||||
* <dl>
|
||||
* <dt>REF</dt><dd>the correctness of the reference base(s).</dd>
|
||||
* <dt>CHR_COUNTS</dt><dd>accuracy of AC & AN values.</dd>
|
||||
* <dt>IDS</dt><dd>tests against rsIDs when a dbSNP file is provided. Notice that for this one to work, you need
|
||||
* to provide a reference to the dbsnp variant containing file using the <code>--dbsnp</code> as show in examples below.</dd>
|
||||
* <dt>ALLELES</dt><dd>and that all alternate alleles are present in at least one sample.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* By default it will apply all the strict validations unless you indicate which one you want to perform
|
||||
* using --validationType as indicated above.
|
||||
* You can request this explicitly using --validationType ALL but this is not necessary.
|
||||
* </p>
|
||||
*
|
||||
* By default it will apply all the strict validations unless you indicate which one you want you want to exclude
|
||||
* using <code>-Xtype|--validationTypeToExclude <<i>code</i>></code>, where <i>code</i> is one of the listed above. You
|
||||
* can exclude as many types as you want
|
||||
* <p>
|
||||
* If you are looking simply to test the adherence to the VCF specification, use --validationType NONE.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* If you want to perform all strict validations but a few you can list the ones to exclude adding
|
||||
* as many '--validationTypeToExclude TYPE_ID' to the command line as needed.
|
||||
* This is only possible when the validation type is the default 'ALL'.
|
||||
* An attempt to combine --validationTypeToExclude with
|
||||
* any other validation type will result in an error.
|
||||
* Yo can exclude all strict validations with the special code <code><b>ALL</b></code>. In this case the tool will only
|
||||
* test the adherence to the VCF specification.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* A variant set to validate.
|
||||
* A variant set to validate using <code>-V</code> or <code>--variant</code> as shown below.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Examples</h3>
|
||||
*
|
||||
* <p>To perform VCF format and all strict validations: </p>
|
||||
*
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
|
|
@ -98,6 +92,27 @@ import java.util.*;
|
|||
* --dbsnp dbsnp.vcf
|
||||
* </pre>
|
||||
*
|
||||
* <p>To perform only VCF format tests:</p>
|
||||
*
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ValidateVariants \
|
||||
* <b>--validationTypeToExclude ALL</b> \
|
||||
* --variant input.vcf
|
||||
* </pre>
|
||||
*
|
||||
* <p>To perform all validations except the strict <i>ALLELE</i> validation:</p>
|
||||
*
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ValidateVariants \
|
||||
* <b>--validationTypeToExclude ALLELES</b>
|
||||
* --variant input.vcf \
|
||||
* --dbsnp dbsnp.vcf
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=0,stop=100))
|
||||
|
|
@ -110,8 +125,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||
|
||||
public enum ValidationType {
|
||||
|
||||
/**
|
||||
* Perform all extra-strict tests listed bellow.
|
||||
* Makes reference to all extra-strict tests listed below.
|
||||
*/
|
||||
ALL,
|
||||
|
||||
|
|
@ -135,18 +151,23 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
* Check that the AN and AC annotations are consistent with the number of calls, alleles and then number these
|
||||
* are called across samples.
|
||||
*/
|
||||
CHR_COUNTS,
|
||||
CHR_COUNTS;
|
||||
|
||||
/**
|
||||
* Do not perform any extra-strict VCF content validation. Notice however that mis-formatted VCF would be reported
|
||||
* as an error (e.g. garbled VCF file, mismatch between format fields and genotype fields number, wrong data types,
|
||||
* or reference to non-existing alternative alleles.
|
||||
* Unmodifiable set of concrete validation types.
|
||||
*
|
||||
* <p>These are all types except {@link #ALL}.</p>
|
||||
*/
|
||||
NONE
|
||||
}
|
||||
public final static Set<ValidationType> CONCRETE_TYPES;
|
||||
|
||||
@Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false)
|
||||
protected ValidationType type = ValidationType.ALL;
|
||||
static {
|
||||
final Set<ValidationType> cts = new LinkedHashSet<>(values().length - 1);
|
||||
for (final ValidationType v : values())
|
||||
if (v != ALL)
|
||||
cts.add(v);
|
||||
CONCRETE_TYPES = Collections.unmodifiableSet(cts);
|
||||
}
|
||||
}
|
||||
|
||||
@Argument(fullName = "validationTypeToExclude", shortName = "Xtype", doc = "which validation type to exclude from a full strict validation", required = false)
|
||||
protected List<ValidationType> excludeTypes = new ArrayList<>();
|
||||
|
|
@ -164,8 +185,14 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private File file = null;
|
||||
|
||||
/**
|
||||
* Contains final set of validation to apply.
|
||||
*/
|
||||
private Collection<ValidationType> validationTypes;
|
||||
|
||||
public void initialize() {
|
||||
file = new File(variantCollection.variants.getSource());
|
||||
validationTypes = calculateValidationTypesToApply(excludeTypes);
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
@ -215,7 +242,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
try {
|
||||
for (final ValidationType t : calculateValidationTypesToApply(type,excludeTypes))
|
||||
for (final ValidationType t : validationTypes)
|
||||
applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t);
|
||||
} catch (TribbleException e) {
|
||||
if ( WARN_ON_ERROR ) {
|
||||
|
|
@ -229,34 +256,27 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
/**
|
||||
* Given the validation type and exclusion type, calculate the final set of type to validate.
|
||||
* @param type the validation type.
|
||||
* @param excludeType types to exclude.
|
||||
* @param excludeTypes types to exclude.
|
||||
*
|
||||
* @throws UserException.BadArgumentValue if the user combines any validation type except 'ALL' and some exclude types.
|
||||
*
|
||||
* @return never {@code null} but perhaps an empty set.
|
||||
*/
|
||||
private Collection<ValidationType> calculateValidationTypesToApply(final ValidationType type, final List<ValidationType> excludeType) {
|
||||
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeType);
|
||||
if (type == ValidationType.ALL) {
|
||||
if (excludeTypeSet.size() == 0)
|
||||
return Collections.singleton(ValidationType.ALL);
|
||||
else {
|
||||
if (excludeTypeSet.remove(ValidationType.NONE))
|
||||
logger.warn("found NONE in the --validationTypeToExclude list; does not have any effect");
|
||||
if (excludeTypeSet.contains(ValidationType.ALL))
|
||||
logger.warn("found ALL in the --validationTypeToExclude list. Perhaps you want to simply use --validationType NONE instead?");
|
||||
|
||||
final Set<ValidationType> result = new LinkedHashSet<>(Arrays.asList(ValidationType.values()));
|
||||
result.remove(ValidationType.ALL);
|
||||
result.remove(ValidationType.NONE);
|
||||
result.removeAll(excludeTypeSet);
|
||||
return result;
|
||||
}
|
||||
} else if (excludeTypeSet.size() != 0)
|
||||
throw new UserException.BadArgumentValue("--validationTypeToExclude","you can use --validationTypeToExclude ONLY in combination with the default --validationType ALL");
|
||||
else
|
||||
return Collections.singleton(type);
|
||||
private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
|
||||
if (excludeTypes.size() == 0)
|
||||
return Collections.singleton(ValidationType.ALL);
|
||||
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
|
||||
if (excludeTypes.size() != excludeTypeSet.size())
|
||||
logger.warn("found repeat redundant validation types listed using the --validationTypeToExclude argument");
|
||||
if (excludeTypeSet.contains(ValidationType.ALL)) {
|
||||
if (excludeTypeSet.size() > 1)
|
||||
logger.warn("found ALL in the --validationTypeToExclude list together with other concrete type exclusions that are redundant");
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
final Set<ValidationType> result = new LinkedHashSet<>(ValidationType.CONCRETE_TYPES);
|
||||
result.removeAll(excludeTypeSet);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private void applyValidationType(VariantContext vc, Allele reportedRefAllele, Allele observedRefAllele, Set<String> rsIDs, ValidationType t) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue