Bug fix for VariantSummary

-- Call sets with indels > 50 bp in length are tagged as CNVs in the tag (following the 1000 Genomes convention) and were unconditionally checking whether the CNV is already known, by looking at the known cnvs file, which is optional.  Fixed.  Has the annoying side effect that indels > 50bp in size are not counted as indels, and so are substrated from both the novel and known counts for indels.  C'est la vie
-- Added integration test to check for this case, using Mauricio's most recent VCF file for NA12878 which has many large indels.  Using this more recent and representative file probably a good idea for more future tests in VE and other tools.  File is NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf in Validation_Data
This commit is contained in:
Mark DePristo 2012-01-05 21:49:02 -05:00
parent 18ed954741
commit c96fee477c
2 changed files with 27 additions and 7 deletions

View File

@ -46,6 +46,7 @@ import java.util.*;
public class VariantSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
/** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
@ -196,14 +197,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
}
private final boolean overlapsKnownCNV(VariantContext cnv) {
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
if ( knownCNVs != null ) {
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
while ( nodeIt.hasNext() ) {
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
if ( overlapP > MIN_CNV_OVERLAP )
return true;
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
while ( nodeIt.hasNext() ) {
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
if ( overlapP > MIN_CNV_OVERLAP )
return true;
}
}
return false;

View File

@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTest("testIntervalStrat", spec);
}
@Test
public void testModernVCFWithLargeIndels() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
"-L 20",
"-D " + b37dbSNP132,
"-o %s"
),
1,
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
);
executeTest("testModernVCFWithLargeIndels", spec);
}
}