Add support for snpEff "GATK compatibility mode" (-o gatk)

-Do not throw an exception when parsing snpEff output files
 generated by not-officially-supported versions of snpEff,
 PROVIDED that snpEff was run with -o gatk

-Requested by the snpEff author

-Relevant integration tests updated/expanded
This commit is contained in:
David Roazen 2013-04-26 13:42:01 -04:00
parent b749f06ba6
commit 7d90bbab08
2 changed files with 48 additions and 24 deletions

View File

@ -226,15 +226,29 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
}
@Test
public void testSnpEffAnnotationsUnsupportedVersion() {
public void testSnpEffAnnotationsUnsupportedVersionGATKMode() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " --no_cmdline_in_header -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000",
"-T VariantAnnotator -R " + b37KGReference + " --no_cmdline_in_header -o %s -A SnpEff " +
"--variant " + privateTestDir + "vcf4.1.example.vcf " +
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_gatk_mode.vcf " +
"-L 1:10001292-10012424",
1,
Arrays.asList("7352cf23a4d45d3d2bb34ab44a4100ae")
);
executeTest("Testing SnpEff annotations (unsupported version, GATK mode)", spec);
}
@Test
public void testSnpEffAnnotationsUnsupportedVersionNoGATKMode() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " --no_cmdline_in_header -o %s -A SnpEff " +
"--variant " + privateTestDir + "vcf4.1.example.vcf " +
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
"-L 1:10001292-10012424",
1,
UserException.class
);
executeTest("Testing SnpEff annotations (unsupported version)", spec);
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
}
@Test

View File

@ -42,6 +42,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.*;
import java.util.regex.Pattern;
/**
* A set of genomic annotations based on the output of the SnpEff variant effect predictor tool
@ -63,6 +64,8 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
public static final String SNPEFF_GATK_COMPATIBILITY_ARGUMENT = "-o gatk";
public static final Pattern SNPEFF_GATK_COMPATIBILITY_ARGUMENT_PATTERN = Pattern.compile("-o\\s+gatk");
// When we write the SnpEff version number and command line to the output VCF, we change
// the key name slightly so that the output VCF won't be confused in the future for an
@ -219,8 +222,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersion(snpEffVersionLine);
checkSnpEffCommandLine(snpEffCommandLine);
checkSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine);
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
@ -267,37 +269,45 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
}
}
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
private void checkSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ) {
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
}
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
"which should be added by all supported versions of SnpEff (%s)",
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
}
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
"is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
throw new UserException(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
"is not currently supported by the GATK, and was not run in GATK " +
"compatibility mode. Supported versions are: %s",
snpEffVersionString, supportedSnpEffVersionsString()));
}
}
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, which should be added by all supported versions of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) {
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
// first check to see if it's an officially-supported version
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) {
return true;
}
}
return false;
// if it's not an officially-supported version, check to see whether the
// "-o gatk" compatibility option was specified
return SNPEFF_GATK_COMPATIBILITY_ARGUMENT_PATTERN.matcher(commandLine).find();
}
private String supportedSnpEffVersionsString() {
return String.format("%s, as well as later versions when run with the option %s",
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS), SNPEFF_GATK_COMPATIBILITY_ARGUMENT);
}
private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {