From 4a0458b2e958adf7742b872127bad6694c9ff8bc Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Thu, 11 May 2017 12:41:51 -0400 Subject: [PATCH] GenotypeGVCFs and CombineGVCFs should error when AS annotations are not present (#1585) GenotypeGVCFs and CombineGVCFs error when missing the allele specific annotation group --- .../tools/walkers/variantutils/CombineGVCFs.java | 3 ++- .../tools/walkers/variantutils/GenotypeGVCFs.java | 3 ++- .../ReferenceConfidenceVariantContextMerger.java | 6 +++++- .../variantutils/CombineGVCFsIntegrationTest.java | 14 ++++++++++++++ .../variantutils/GenotypeGVCFsIntegrationTest.java | 10 ++++++++++ 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java index ca2988d81..aa1b00f5f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java @@ -106,10 +106,11 @@ import java.util.*; * -o cohort.g.vcf * * - *

Caveat

+ *

Caveats

*

Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other * programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods * for every position) that GenotypeGVCFs requires for its operation.

+ *

If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.

* */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java index b298ba25c..da9b8358a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java @@ -122,10 +122,11 @@ import java.util.*; * -o output.vcf * * - *

Caveat

+ *

Caveats

*

Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other * programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods * for every position) that GenotypeGVCFs requires for its operation.

+ *

If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.

* *

Special note on ploidy

*

This tool is able to handle any ploidy (or mix of ploidies) intelligently; there is no need to specify ploidy diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java index 05c0a930d..7443c8d8d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java @@ -78,6 +78,8 @@ public class ReferenceConfidenceVariantContextMerger { private final static Logger logger = Logger.getLogger(ReferenceConfidenceVariantContextMerger.class); + static final String ADD_AS_STANDARD_MSG = " Add -G Standard -G AS_Standard to the command to annotate in the final VC."; + private static Comparable combineAnnotationValues( final List array ) { return MathUtils.median(array); // right now we take the median but other options could be explored } @@ -244,7 +246,9 @@ public class ReferenceConfidenceVariantContextMerger { } } catch (final NumberFormatException e) { - logger.warn("WARNING: remaining (non-reducible) annotations are assumed to be ints or doubles or booleans, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC."); + final String baseMsg = "Remaining (non-reducible) annotations are assumed to be ints or doubles, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC."; + final String msg = value.getRawData().contains("|") ? baseMsg + ADD_AS_STANDARD_MSG : baseMsg; + logger.warn(msg); } } parsedAnnotations.put(currentData.getKey(),annotationValues); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java index 286ef6604..6945d82b3 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -51,6 +51,7 @@ package org.broadinstitute.gatk.tools.walkers.variantutils; +import org.apache.commons.io.FileUtils; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.engine.GATKVCFUtils; import htsjdk.variant.variantcontext.VariantContext; @@ -58,6 +59,7 @@ import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.List; @@ -282,6 +284,18 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { executeTest("testAlleleSpecificAnnotations", spec); } + @Test + public void testMissingAlleleSpecificAnnotationGroup() throws IOException { + final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp"); + final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " + + logFile.getAbsolutePath(); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("")); + spec.disableShadowBCF(); + executeTest("testMissingAlleleSpecificAnnotationGroup", spec); + Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ReferenceConfidenceVariantContextMerger.ADD_AS_STANDARD_MSG)); + } + @Test public void testASMateRankSumAnnotation() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest -V " diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 2531fb570..3639f236f 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -604,6 +604,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { executeTest("testAlleleSpecificAnnotations", spec); } + @Test + public void testMissingAlleleSpecificAnnotationGroup() throws IOException { + final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp"); + final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header --disableDithering -V " + + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " + logFile.getAbsolutePath(); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("")); + spec.disableShadowBCF(); + executeTest("testMissingAlleleSpecificAnnotationGroup", spec); + } + @Test public void testASMateRankSumAnnotation() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "