From c96fee477ca7b7ae0bc7b3249815170b2b035160 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 5 Jan 2012 21:49:02 -0500 Subject: [PATCH] Bug fix for VariantSummary -- Call sets with indels > 50 bp in length are tagged as CNVs in the tag (following the 1000 Genomes convention) and were unconditionally checking whether the CNV is already known, by looking at the known cnvs file, which is optional. Fixed. Has the annoying side effect that indels > 50bp in size are not counted as indels, and so are substrated from both the novel and known counts for indels. C'est la vie -- Added integration test to check for this case, using Mauricio's most recent VCF file for NA12878 which has many large indels. Using this more recent and representative file probably a good idea for more future tests in VE and other tools. File is NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf in Validation_Data --- .../varianteval/evaluators/VariantSummary.java | 17 ++++++++++------- .../varianteval/VariantEvalIntegrationTest.java | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index b3f3630bd..3c7c6f00c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -46,6 +46,7 @@ import java.util.*; public class VariantSummary extends VariantEvaluator implements StandardEval { final protected static Logger logger = Logger.getLogger(VariantSummary.class); + /** Indels with size greater than this value are tallied in the CNV column */ private final static int MAX_INDEL_LENGTH = 50; private final static double MIN_CNV_OVERLAP = 0.5; private VariantEvalWalker walker; @@ -196,14 +197,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } private final boolean overlapsKnownCNV(VariantContext cnv) { - final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); - IntervalTree intervalTree = knownCNVs.get(loc.getContig()); + if ( knownCNVs != null ) { + final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); + IntervalTree intervalTree = knownCNVs.get(loc.getContig()); - final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); - while ( nodeIt.hasNext() ) { - final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue()); - if ( overlapP > MIN_CNV_OVERLAP ) - return true; + final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); + while ( nodeIt.hasNext() ) { + final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue()); + if ( overlapP > MIN_CNV_OVERLAP ) + return true; + } } return false; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index b3555b145..5c3a43c96 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest { ); executeTest("testIntervalStrat", spec); } + + @Test + public void testModernVCFWithLargeIndels() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf", + "-L 20", + "-D " + b37dbSNP132, + "-o %s" + ), + 1, + Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef") + ); + executeTest("testModernVCFWithLargeIndels", spec); + } }