Added ValidationData analysis type to VariantEvalWalker; this eval takes a GFF file with validated truth data positions (bound to "validation")and calculates the accuracy of the genotype calls bound to "eval".
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1862 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
07b134a124
commit
d1a4cd2f73
|
|
@ -444,7 +444,7 @@ public class GenomeAnalysisEngine {
|
||||||
// Check to see that no forbidden rods are present.
|
// Check to see that no forbidden rods are present.
|
||||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods) {
|
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods) {
|
||||||
if (!WalkerManager.isAllowed(walker, rod))
|
if (!WalkerManager.isAllowed(walker, rod))
|
||||||
throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is incorrect, change the @Allows metadata", rod.getName()));
|
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s. If this is incorrect, change the @Allows metadata", walker.getClass(), rod.getName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: andrewk
|
||||||
|
* Date: Sep 30, 2009
|
||||||
|
* Time: 5:28:57 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// @Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=VariationRod.class), @RMD(name="dbsnp",type= rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)})
|
||||||
|
|
||||||
|
public class ValidationDataAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||||
|
private int calls_at_validated_sites = 0;
|
||||||
|
private int calls_at_sites_validated_true = 0;
|
||||||
|
private int validated_sites = 0;
|
||||||
|
|
||||||
|
public ValidationDataAnalysis() {
|
||||||
|
super("validation_data_analysis");
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
validated_sites++;
|
||||||
|
Variation val_data = (Variation) tracker.lookup("validation", null);
|
||||||
|
Variation dbsnp = (Variation) tracker.lookup("dbsnp",null);
|
||||||
|
|
||||||
|
if (eval != null) {
|
||||||
|
calls_at_sites_validated_true++;
|
||||||
|
//out.format("Has validaiton data: %s\n", val_data.getLocation());
|
||||||
|
if (val_data != null) {
|
||||||
|
//out.format("Validated true: %s\n", val_data.getLocation());
|
||||||
|
calls_at_validated_sites++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.println(context.getLocation());
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> done() {
|
||||||
|
List<String> s = new ArrayList<String>();
|
||||||
|
s.add(String.format("validated sites %d", validated_sites));
|
||||||
|
s.add(String.format("calls at validated sites %d", calls_at_sites_validated_true));
|
||||||
|
s.add(String.format("calls at sites validated true %d", calls_at_validated_sites));
|
||||||
|
s.add(String.format("%% validated true %f", (float) calls_at_validated_sites / calls_at_sites_validated_true));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=ReferenceOrderedDatum.class)}) // right now we have no base variant class for rods, this should change
|
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=ReferenceOrderedDatum.class)}) // right now we have no base variant class for rods, this should change
|
||||||
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=ReferenceOrderedDatum.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=ReferenceOrderedDatum.class), @RMD(name="interval",type=IntervalRod.class)})
|
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=ReferenceOrderedDatum.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=ReferenceOrderedDatum.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)})
|
||||||
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
|
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
|
||||||
public int minConfidenceScore = -1;
|
public int minConfidenceScore = -1;
|
||||||
|
|
@ -115,6 +115,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
//
|
//
|
||||||
// Add new analyses here!
|
// Add new analyses here!
|
||||||
//
|
//
|
||||||
|
analyses.add(new ValidationDataAnalysis());
|
||||||
analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile));
|
analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile));
|
||||||
analyses.add(new VariantCounter());
|
analyses.add(new VariantCounter());
|
||||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue