GenotypeGVCFs and CombineGVCFs should error when AS annotations are not present (#1585)

GenotypeGVCFs and CombineGVCFs error when missing the allele specific annotation group
This commit is contained in:
Ron Levine 2017-05-11 12:41:51 -04:00 committed by GitHub
parent c855714e10
commit 4a0458b2e9
5 changed files with 33 additions and 3 deletions

View File

@ -106,10 +106,11 @@ import java.util.*;
* -o cohort.g.vcf
* </pre>
*
* <h3>Caveat</h3>
* <h3>Caveats</h3>
* <p>Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other
* programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods
* for every position) that GenotypeGVCFs requires for its operation.</p>
* <p>If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.</p>
*
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )

View File

@ -122,10 +122,11 @@ import java.util.*;
* -o output.vcf
* </pre>
*
* <h3>Caveat</h3>
* <h3>Caveats</h3>
* <p>Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other
* programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods
* for every position) that GenotypeGVCFs requires for its operation.</p>
* <p>If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.</p>
*
* <h3>Special note on ploidy</h3>
* <p>This tool is able to handle any ploidy (or mix of ploidies) intelligently; there is no need to specify ploidy

View File

@ -78,6 +78,8 @@ public class ReferenceConfidenceVariantContextMerger {
private final static Logger logger = Logger.getLogger(ReferenceConfidenceVariantContextMerger.class);
static final String ADD_AS_STANDARD_MSG = " Add -G Standard -G AS_Standard to the command to annotate in the final VC.";
private static Comparable combineAnnotationValues( final List<Comparable> array ) {
return MathUtils.median(array); // right now we take the median but other options could be explored
}
@ -244,7 +246,9 @@ public class ReferenceConfidenceVariantContextMerger {
}
} catch (final NumberFormatException e) {
logger.warn("WARNING: remaining (non-reducible) annotations are assumed to be ints or doubles or booleans, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC.");
final String baseMsg = "Remaining (non-reducible) annotations are assumed to be ints or doubles, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC.";
final String msg = value.getRawData().contains("|") ? baseMsg + ADD_AS_STANDARD_MSG : baseMsg;
logger.warn(msg);
}
}
parsedAnnotations.put(currentData.getKey(),annotationValues);

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import htsjdk.variant.variantcontext.VariantContext;
@ -58,6 +59,7 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
@ -282,6 +284,18 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
executeTest("testAlleleSpecificAnnotations", spec);
}
@Test
public void testMissingAlleleSpecificAnnotationGroup() throws IOException {
final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp");
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V "
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " +
logFile.getAbsolutePath();
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList(""));
spec.disableShadowBCF();
executeTest("testMissingAlleleSpecificAnnotationGroup", spec);
Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ReferenceConfidenceVariantContextMerger.ADD_AS_STANDARD_MSG));
}
@Test
public void testASMateRankSumAnnotation() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest -V "

View File

@ -604,6 +604,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
executeTest("testAlleleSpecificAnnotations", spec);
}
@Test
public void testMissingAlleleSpecificAnnotationGroup() throws IOException {
final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp");
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header --disableDithering -V "
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " + logFile.getAbsolutePath();
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList(""));
spec.disableShadowBCF();
executeTest("testMissingAlleleSpecificAnnotationGroup", spec);
}
@Test
public void testASMateRankSumAnnotation() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "