Added playground walker that does a basic concordance check between two VCF files - an eval and a truth file - across all samples in the eval file. Produces per-sample, per-locus debug info and simple concordance stats. This is not meant to be extended, but rather used for validating the HapMap to VCF conversion in preparation for retiring GFF-based HapMap data.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2813 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2010-02-09 02:41:18 +00:00
parent 94f892ad42
commit 369cc50802
1 changed files with 73 additions and 0 deletions

View File

@ -0,0 +1,73 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
/**
* Calculates concordance between two VCF files; used for testing conversion of HapMap data to VCF
*/
@Requires(value={DataSource.REFERENCE})
public class VCFConcordance extends RodWalker<Integer, Integer> {
int correct = 0;
int incorrect = 0;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (tracker != null) {
EnumSet<VariantContext.Type> vc = EnumSet.of(VariantContext.Type.SNP);
GenomeLoc loc = context.getLocation();
VariantContext eval = null;
VariantContext truth = null;
try {
eval = tracker.getVariantContext("eval", vc, loc, true);
truth = tracker.getVariantContext("truth", vc, loc, true);
} catch (java.util.NoSuchElementException e) {
return 0;
}
assert(truth != null);
for (String eval_samplename : eval.getGenotypes().keySet()) {
Genotype eval_genotype = eval.getGenotype(eval_samplename);
if (truth.hasGenotype(eval_samplename) && eval.getAlleles() != Allele.NO_CALL) {
Genotype truth_genotype = truth.getGenotype(eval_samplename);
if (eval_genotype.sameGenotype(truth_genotype)) {
out.printf("== ");
correct++;
}else{
out.printf("<> ");
incorrect++;
}
out.printf("%s %s %s %s\n", loc, eval_samplename, eval_genotype, truth_genotype);
}
}
}
return 1;
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public void onTraversalDone(Integer value) {
out.format("Correct: %d\n", correct);
out.format("Incorrect: %d\n", incorrect);
out.format("Concordance: %.1f\n", (float)correct/(correct+incorrect)*100);
}
}