Added playground walker that does a basic concordance check between two VCF files - an eval and a truth file - across all samples in the eval file. Produces per-sample, per-locus debug info and simple concordance stats. This is not meant to be extended, but rather used for validating the HapMap to VCF conversion in preparation for retiring GFF-based HapMap data.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2813 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
94f892ad42
commit
369cc50802
|
|
@ -0,0 +1,73 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates concordance between two VCF files; used for testing conversion of HapMap data to VCF
|
||||||
|
*/
|
||||||
|
@Requires(value={DataSource.REFERENCE})
|
||||||
|
public class VCFConcordance extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
int correct = 0;
|
||||||
|
int incorrect = 0;
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if (tracker != null) {
|
||||||
|
EnumSet<VariantContext.Type> vc = EnumSet.of(VariantContext.Type.SNP);
|
||||||
|
GenomeLoc loc = context.getLocation();
|
||||||
|
VariantContext eval = null;
|
||||||
|
VariantContext truth = null;
|
||||||
|
try {
|
||||||
|
eval = tracker.getVariantContext("eval", vc, loc, true);
|
||||||
|
truth = tracker.getVariantContext("truth", vc, loc, true);
|
||||||
|
} catch (java.util.NoSuchElementException e) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
assert(truth != null);
|
||||||
|
for (String eval_samplename : eval.getGenotypes().keySet()) {
|
||||||
|
Genotype eval_genotype = eval.getGenotype(eval_samplename);
|
||||||
|
if (truth.hasGenotype(eval_samplename) && eval.getAlleles() != Allele.NO_CALL) {
|
||||||
|
Genotype truth_genotype = truth.getGenotype(eval_samplename);
|
||||||
|
if (eval_genotype.sameGenotype(truth_genotype)) {
|
||||||
|
out.printf("== ");
|
||||||
|
correct++;
|
||||||
|
}else{
|
||||||
|
out.printf("<> ");
|
||||||
|
incorrect++;
|
||||||
|
}
|
||||||
|
out.printf("%s %s %s %s\n", loc, eval_samplename, eval_genotype, truth_genotype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return sum + value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer value) {
|
||||||
|
out.format("Correct: %d\n", correct);
|
||||||
|
out.format("Incorrect: %d\n", incorrect);
|
||||||
|
out.format("Concordance: %.1f\n", (float)correct/(correct+incorrect)*100);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue