From 9121b9816741fd597321a587f3fa9d18c24e0ed4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 19 Aug 2012 10:29:38 -0400 Subject: [PATCH] CombineVariants outputs the first non-MISSING qual, not the maximum -- When merging multiple VCF records at a site, the combined VCF record has the QUAL of the first VCF record with a non-MISSING QUAL value. The previous behavior was to take the max QUAL, which resulted in sometime strange downstream confusion. --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 7 +++++++ .../sting/utils/variantcontext/VariantContextUtils.java | 6 ++++-- .../variantutils/CombineVariantsIntegrationTest.java | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 8dabd49b8..555999bdb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -73,6 +73,13 @@ import java.util.*; * efficiency. However, since this merge runs in only one thread, you can quickly reach diminishing * returns with the number of parallel threads. -nt 4 works well but -nt 8 may be too much. * + * Some fine details about the merging algorithm: + * + * *

Input

*

* One or more variant sets to combine. diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ff6b0be70..d7e4a7135 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -514,7 +514,7 @@ public class VariantContextUtils { int depth = 0; int maxAC = -1; final Map attributesWithMaxAC = new LinkedHashMap(); - double log10PError = 1; + double log10PError = CommonInfo.NO_LOG10_PERROR; VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); @@ -542,7 +542,9 @@ public class VariantContextUtils { mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY); - log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1); + // We always take the QUAL of the first VC with a non-MISSING qual for the combined value + if ( log10PError == CommonInfo.NO_LOG10_PERROR ) + log10PError = vc.getLog10PError(); filters.addAll(vc.getFilters()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9ea751b72..c32d77f82 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -113,13 +113,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "629656bfef7713c23f3a593523503b2f"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); } @Test public void uniqueSNPs() { // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s - combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true); + combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true); } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); } @@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019")); + Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00")); cvExecuteTest("threeWayWithRefs", spec, true); }