Genotype And Validate walker is now ready to be used by anyone.
given an annotated VCF and a BAM file, it genotypes (using the reads in the BAM) each variant in the VCF (for snp or indel) and validates (or not) the 'known' annotation. Outputs a truth table with the PPV and NPV values, and optionally a vcf file with the variants that had enough coverage to be validated. You can optionally provide a minimum depth of coverage and only do the analysis conditional on that. (will write a wiki for this walker, as it might be useful for future validation essays). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5409 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
da88c29b6e
commit
fa7284b7a1
|
|
@ -25,11 +25,16 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -39,8 +44,8 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -61,15 +66,18 @@ import java.util.*;
|
|||
|
||||
public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> {
|
||||
|
||||
@Output(doc="File to which validated variants should be written", required=true)
|
||||
protected VCFWriter vcfWriter = null;
|
||||
|
||||
@Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
|
||||
private int mbq = -1;
|
||||
|
||||
@Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false)
|
||||
private double deletions = -1;
|
||||
|
||||
@Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false)
|
||||
private int minDepth = -1;
|
||||
|
||||
@Output( doc="File to write the two-way truth table", required=false)
|
||||
private PrintStream printStream = null;
|
||||
|
||||
private String compName = "alleles";
|
||||
private UnifiedGenotyperEngine snpEngine;
|
||||
|
|
@ -115,6 +123,15 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
if ( !rodList.get(0).getName().equals(compName))
|
||||
throw new UserException.BadInput("The ROD track has to be named \""+ compName +"\". Not " + rodList.get(0).getName());
|
||||
|
||||
|
||||
// Initialize VCF header
|
||||
Map<String, VCFHeader> header = VCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), compName);
|
||||
Set<String> samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), logger);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
||||
|
||||
// Filling in SNP calling arguments for UG
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
|
|
@ -143,7 +160,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
return counter;
|
||||
|
||||
VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false);
|
||||
if( vcComp == null ){
|
||||
if( vcComp == null )
|
||||
return counter;
|
||||
|
||||
// Do not operate on variants that are not covered to the optional minimum depth
|
||||
if (!context.hasReads() || minDepth > 0 && context.getBasePileup().getBases().length < minDepth) {
|
||||
counter.numUncovered = 1L;
|
||||
return counter;
|
||||
}
|
||||
|
||||
|
|
@ -153,23 +175,22 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
VariantCallContext call;
|
||||
if ( vcComp.isSNP() )
|
||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||
else if ( vcComp.isIndel() )
|
||||
else if ( vcComp.isIndel() ) {
|
||||
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||
if (call.vc == null) // variant context will be null on an extended indel event and I just want to call it one event.
|
||||
return counter;
|
||||
}
|
||||
else {
|
||||
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
|
||||
return counter;
|
||||
}
|
||||
|
||||
if (!call.confidentlyCalled) {
|
||||
if (context.hasReads()) {
|
||||
counter.numNotConfidentCalls = 1L;
|
||||
if (vcComp.getFilters().contains("TP"))
|
||||
counter.numFN = 1L;
|
||||
else
|
||||
counter.numTN = 1L;
|
||||
}
|
||||
counter.numNotConfidentCalls = 1L;
|
||||
if (vcComp.getFilters().contains("TP"))
|
||||
counter.numFN = 1L;
|
||||
else
|
||||
counter.numUncovered = 1L;
|
||||
counter.numTN = 1L;
|
||||
}
|
||||
else {
|
||||
counter.numConfidentCalls = 1L;
|
||||
|
|
@ -178,7 +199,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
else
|
||||
counter.numFP = 1L;
|
||||
}
|
||||
|
||||
vcfWriter.add(vcComp, ref.getBase());
|
||||
return counter;
|
||||
}
|
||||
|
||||
|
|
@ -202,6 +223,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
logger.info("TN = " + reduceSum.numTN);
|
||||
logger.info("FP = " + reduceSum.numFP);
|
||||
logger.info("FN = " + reduceSum.numFN);
|
||||
logger.info("PPV = " + ((double) reduceSum.numTP /( reduceSum.numTP + reduceSum.numFP)));
|
||||
logger.info("FPV = " + ((double) reduceSum.numTN /( reduceSum.numTN + reduceSum.numFN)));
|
||||
logger.info("Uncovered = " + reduceSum.numUncovered);
|
||||
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
||||
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
||||
|
|
|
|||
Loading…
Reference in New Issue