From ea8a1edeb6a5b648b4f66cbdcdd5d3a686438924 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 12 Mar 2015 13:29:52 -0400 Subject: [PATCH] Adding option to CombineGVCFs to have it break blocks at every N sites. Using --breakBandsAtMultiplesOf N will ensure that no reference blocks span across genomic positions that are multiples of N. This is especially important in the case of scatter-gather where you don't want your scatter intervals to start in the middle of blocks (because of a limitation in the way -L works in the GATK for VCF records with the END tag). For example, running with --breakBandsAtMultiplesOf 5 on this record: 1 69491 . G . . END=69523 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 Will produce the following records: 1 69491 . G . . END=69494 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 1 69495 . C . . END=69499 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 1 69500 . T . . END=69504 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 etc. Added docs and a new test. --- .../walkers/variantutils/CombineGVCFs.java | 28 ++++++++++++++++++- .../CombineGVCFsIntegrationTest.java | 8 ++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java index 674006661..7e7926f3a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java @@ -148,6 +148,16 @@ public class CombineGVCFs extends RodWalker 0 && (loc.getStart()+1) % multipleAtWhichToBreakBands == 0); // add +1 to the loc because we want to break BEFORE this base + } + /** * Is it okay to skip the given position? * diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java index d0a979294..3f202dab4 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -203,6 +203,14 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { executeTest("testBasepairResolutionOutput", spec); } + @Test + public void testBreakBlocks() throws Exception { + final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("3a8e53b8b590eaa2675149ceccb80a7a")); + spec.disableShadowBCF(); + executeTest("testBreakBlocks", spec); + } + @Test public void testWrongReferenceBaseBugFix() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"