From fa7284b7a11e148790620671c508dc2539998c55 Mon Sep 17 00:00:00 2001 From: carneiro Date: Tue, 8 Mar 2011 22:10:38 +0000 Subject: [PATCH] Genotype And Validate walker is now ready to be used by anyone. given an annotated VCF and a BAM file, it genotypes (using the reads in the BAM) each variant in the VCF (for snp or indel) and validates (or not) the 'known' annotation. Outputs a truth table with the PPV and NPV values, and optionally a vcf file with the variants that had enough coverage to be validated. You can optionally provide a minimum depth of coverage and only do the analysis conditional on that. (will write a wiki for this walker, as it might be useful for future validation essays). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5409 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/GenotypeAndValidateWalker.java | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java index 125166e0d..16afd8912 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java @@ -25,11 +25,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import com.google.common.collect.ImmutableSet; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; @@ -39,8 +44,8 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.vcf.VCFUtils; -import java.io.PrintStream; import java.util.*; /** @@ -61,15 +66,18 @@ import java.util.*; public class GenotypeAndValidateWalker extends RodWalker { + @Output(doc="File to which validated variants should be written", required=true) + protected VCFWriter vcfWriter = null; + @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) private int mbq = -1; @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false) private double deletions = -1; + @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false) + private int minDepth = -1; - @Output( doc="File to write the two-way truth table", required=false) - private PrintStream printStream = null; private String compName = "alleles"; private UnifiedGenotyperEngine snpEngine; @@ -115,6 +123,15 @@ public class GenotypeAndValidateWalker extends RodWalker header = VCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), compName); + Set samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = VCFUtils.smartMergeHeaders(header.values(), logger); + headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); + vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); + + // Filling in SNP calling arguments for UG UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; @@ -143,7 +160,12 @@ public class GenotypeAndValidateWalker extends RodWalker 0 && context.getBasePileup().getBases().length < minDepth) { + counter.numUncovered = 1L; return counter; } @@ -153,23 +175,22 @@ public class GenotypeAndValidateWalker extends RodWalker