From 4a3c8e68fa866bd4d5878e27f463d940fa20a6ea Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Thu, 6 Feb 2014 17:45:50 -0500 Subject: [PATCH] Fixed out of order non-variant gVCF entries when trimming is active. Story: https://www.pivotaltracker.com/story/show/65319564 --- .../haplotypecaller/ActiveRegionTrimmer.java | 22 +++++++++++-------- .../HaplotypeCallerGVCFIntegrationTest.java | 16 +++++++++++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java index e18fa9670..b7a646d4e 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java @@ -489,16 +489,20 @@ class ActiveRegionTrimmer { final GenomeLoc idealSpan = locParser.createPaddedGenomeLoc(variantSpan, padding); final GenomeLoc finalSpan = maximumSpan.intersect(idealSpan).union(variantSpan); - final Pair nonVariantRegions = nonVariantTargetRegions(originalRegion, variantSpan); + // Make double sure that, if we are emitting GVCF we won't call non-variable positions beyond the target active region span. + // In regular call we don't do so so we don't care and we want to maintain behavior, so the conditional. + final GenomeLoc callableSpan = emitReferenceConfidence ? variantSpan.intersect(originalRegionRange) : variantSpan; + + final Pair nonVariantRegions = nonVariantTargetRegions(originalRegion, callableSpan); if ( debug ) { - logger.info("events : " + withinActiveRegion); - logger.info("region : " + originalRegion); - logger.info("variantSpan : " + variantSpan); - logger.info("pad : " + padding); - logger.info("idealSpan : " + idealSpan); - logger.info("maximumSpan : " + maximumSpan); - logger.info("finalSpan : " + finalSpan); + logger.info("events : " + withinActiveRegion); + logger.info("region : " + originalRegion); + logger.info("callableSpan : " + callableSpan); + logger.info("padding : " + padding); + logger.info("idealSpan : " + idealSpan); + logger.info("maximumSpan : " + maximumSpan); + logger.info("finalSpan : " + finalSpan); } return new Result(emitReferenceConfidence,true,originalRegion,padding, usableExtension,withinActiveRegion,nonVariantRegions,finalSpan,idealSpan,maximumSpan,variantSpan); @@ -527,7 +531,7 @@ class ActiveRegionTrimmer { locParser.createGenomeLoc(contig, finalStop + 1, targetStop)) : new Pair<>(locParser.createGenomeLoc(contig, targetStart, finalStart - 1),GenomeLoc.UNMAPPED); } else if (postTrimmingRequired) - return new Pair<>(locParser.createGenomeLoc(targetRegionRange.getContig(), finalStop + 1, targetStop),GenomeLoc.UNMAPPED); + return new Pair<>(GenomeLoc.UNMAPPED,locParser.createGenomeLoc(targetRegionRange.getContig(), finalStop + 1, targetStop)); else return new Pair<>(GenomeLoc.UNMAPPED,GenomeLoc.UNMAPPED); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index b533d391f..88589f216 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -47,11 +47,13 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variant.GATKVCFIndexType; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -84,7 +86,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); - executeTest(name, spec); + final Pair,List> executionOutput = executeTest(name, spec); } @Test @@ -127,4 +129,16 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } + + private final static String WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS = privateTestDir + "gvcf_unsorted_records_bug.interval_list"; + private final static String WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM = privateTestDir + "gvcf_unsorted_records_bug.bam"; + + @Test() + public void testWrongGVCFNonVariantRecordOrderBugFix() { + final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", + b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("324eb46738a364cd7dc5fa0b62491a5e")); + spec.disableShadowBCF(); + executeTest("testMissingGVCFIndexingStrategyException", spec); + } }