From 3b1c33740185150acf2da4781c6a0937ebd81a9a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Mar 2014 19:11:40 -0400 Subject: [PATCH 1/3] Have CombineVariants throw a UserError when trying to combine GVCFs from the HaplotypeCaller. Was previously throwing an IllegalArgumentException (in the wrong place in the code). Error message tells users to use CombineGVCFs. --- .../CombineVariantsIntegrationTest.java | 13 +++++++++++++ .../gatk/walkers/variantutils/CombineVariants.java | 3 +++ .../utils/variant/GATKVariantContextUtils.java | 4 ---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index fb54ab400..91952ec43 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -204,4 +204,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest { Arrays.asList("f8c014d0af7e014475a2a448dc1f9cef")); cvExecuteTest("combineLeavesUnfilteredRecordsUnfiltered: ", spec, false); } + + @Test + public void combiningGVCFsFails() { + try { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants --no_cmdline_in_header -o %s " + + " -R " + b37KGReference + + " -V " + privateTestDir + "gvcfExample1.vcf", + 1, + Arrays.asList("FAILFAILFAILFAILFAILFAILFAILFAIL")); + executeTest("combiningGVCFsFails", spec); + } catch (Exception e) { } // do nothing + } } \ No newline at end of file diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 152128022..c3e773dcd 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -326,6 +326,9 @@ public class CombineVariants extends RodWalker implements Tree if ( mergedVC == null ) continue; + if ( mergedVC.hasAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) ) + throw new UserException("CombineVariants should not be used to merge gVCFs produced by the HaplotypeCaller; use CombineGVCFs instead"); + final VariantContextBuilder builder = new VariantContextBuilder(mergedVC); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(builder, false); diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index fb5564ab3..5670f5871 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -1921,10 +1921,6 @@ public class GATKVariantContextUtils { final HashMap> mappedVCs = new HashMap<>(); for ( final VariantContext vc : VCs ) { VariantContext.Type vcType = vc.getType(); - if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) { - if( vc.getAlternateAlleles().size() > 1 ) { throw new IllegalStateException("Reference records should not have more than one alternate allele"); } - vcType = VariantContext.Type.NO_VARIATION; - } // look at previous variant contexts of different type. If: // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list From 824983af1d1cbe9bb943f835c9b2934dac9666e4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Mar 2014 19:23:05 -0400 Subject: [PATCH 2/3] Enable CombineGVCFs to process gVCFs that were created with basepair resolution. --- .../gatk/walkers/variantutils/CombineGVCFs.java | 2 +- .../variantutils/CombineGVCFsIntegrationTest.java | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java index 580380513..788dbf0f6 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java @@ -292,7 +292,7 @@ public class CombineGVCFs extends RodWalker attrs = new HashMap<>(1); - if ( !USE_BP_RESOLUTION ) + if ( !USE_BP_RESOLUTION && end != start ) attrs.put(VCFConstants.END_KEY, Integer.toString(end)); // genotypes diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java index 1a4d0c1f4..3051571eb 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -161,16 +161,24 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test public void testMD5s() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("aecdfa9eb32b802cd629e9f811ef15fd")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c279e089fd15359e75867b1318cb4d50")); spec.disableShadowBCF(); executeTest("testMD5s", spec); } @Test - public void testBasepairResolution() throws Exception { + public void testBasepairResolutionOutput() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution"); final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4")); spec.disableShadowBCF(); - executeTest("testBasepairResolution", spec); + executeTest("testBasepairResolutionOutput", spec); + } + + @Test + public void testBasepairResolutionInput() throws Exception { + final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf"; + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("0bd914cfa16349ee0439bfa5033a4f2a")); + spec.disableShadowBCF(); + executeTest("testBasepairResolutionInput", spec); } } From 7c8ce3cd6a88eeb55aedc22ef6114060a473df75 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 20 Mar 2014 00:01:50 -0400 Subject: [PATCH 3/3] Several improvements to GenotypeGVCFs: --includeNonVariantSites now actually works and we propagate AD to hom ref samples --- .../walkers/variantutils/GenotypeGVCFs.java | 56 ++++++++++++++----- .../GenotypeGVCFsIntegrationTest.java | 12 ++-- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java index a6d151df8..09c291955 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java @@ -121,8 +121,7 @@ public class GenotypeGVCFs extends RodWalker cleanupGenotypeAnnotations(final GenotypesContext newGs) { - final List recoveredGs = new ArrayList<>(newGs.size()); - for ( final Genotype newG : newGs ) { - final Map attrs = new HashMap<>(newG.getExtendedAttributes()); + private List cleanupGenotypeAnnotations(final VariantContext VC, final boolean createRefGTs) { + final GenotypesContext oldGTs = VC.getGenotypes(); + final List recoveredGs = new ArrayList<>(oldGTs.size()); + for ( final Genotype oldGT : oldGTs ) { + final Map attrs = new HashMap<>(oldGT.getExtendedAttributes()); - final GenotypeBuilder builder = new GenotypeBuilder(newG); + final GenotypeBuilder builder = new GenotypeBuilder(oldGT); + int depth = oldGT.hasDP() ? oldGT.getDP() : 0; // move the MIN_DP to DP - if ( newG.hasExtendedAttribute("MIN_DP") ) { - builder.DP(newG.getAttributeAsInt("MIN_DP", 0)); + if ( oldGT.hasExtendedAttribute("MIN_DP") ) { + depth = oldGT.getAttributeAsInt("MIN_DP", 0); + builder.DP(depth); attrs.remove("MIN_DP"); } // remove SB attrs.remove("SB"); + // create AD if it's not there + if ( !oldGT.hasAD() && VC.isVariant() ) { + final int[] AD = new int[VC.getNAlleles()]; + AD[0] = depth; + builder.AD(AD); + } + + if ( createRefGTs ) { + final int ploidy = oldGT.getPloidy(); + final List refAlleles = new ArrayList<>(ploidy); + for ( int i = 0; i < ploidy; i++ ) + refAlleles.add(VC.getReference()); + builder.alleles(refAlleles); + + // also, the PLs are technically no longer usable + builder.noPL(); + } + recoveredGs.add(builder.noAttributes().attributes(attrs).make()); } return recoveredGs; diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 1ca23caba..1e2b30fee 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -65,19 +65,19 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("2be5f6f7e7f79841108906555d548683")); + Arrays.asList("af58920a965229526aa6961f0d30b035")); executeTest("combineSingleSamplePipelineGVCF", spec); } - @Test(enabled = false) // TODO -- reenable when this option works + @Test(enabled = true) public void combineSingleSamplePipelineGVCF_includeNonVariants() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" + " -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" + " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + - " -inv -L 20:10,000,000-10,010,000", b37KGReference), + " --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference), 1, - Arrays.asList("de957075796512cb9f333f77515e97d5")); + Arrays.asList("764c8d62b4128496471f49cda029b5da")); executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec); } @@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference), 1, - Arrays.asList("e3c7452277898fece54bf60af9588666")); + Arrays.asList("67120b6784e91e737c37a1d0b3d53bc4")); executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec); } @@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V " + privateTestDir + "gvcfExample1.vcf" + " -V " + privateTestDir + "gvcfExample2.vcf", 1, - Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb")); + Arrays.asList("24401bb1011bf6cd452e428c6ad5852d")); executeTest("testSamplesWithDifferentLs", spec); } } \ No newline at end of file