diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java index 2903e28c6..e26e08bac 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java @@ -606,6 +606,14 @@ public class VariantContext { */ public boolean hasGenotypes() { return genotypes.size() > 0; } + public boolean hasGenotypes(Collection sampleNames) { + for ( String name : sampleNames ) { + if ( ! genotypes.containsKey(name) ) + return false; + } + return true; + } + /** * @return set of all Genotypes associated with this context */ diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java index 93fb68ed2..66272b3d1 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java @@ -20,10 +20,15 @@ import java.util.Arrays; * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ public class ValidationRate extends VariantEvaluator { + + // todo -- subset validation data by list of samples, if provided + + // todo -- print out PPV and sensitivity numbers + class SiteStats { long nPoly = 0, nMono = 0, nNoCall = 0; - double polyPercent() { return rate(nPoly, nPoly + nMono + nNoCall); } + double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); } } private SiteStats validationStats = new SiteStats(); @@ -47,16 +52,22 @@ public class ValidationRate extends VariantEvaluator { } private String summaryLine() { - return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f", + long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono; + long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono; + long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall; + + return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f %.2f %.2f", validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(), evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(), - evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent()); + evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent(), + 100 * rate(TP, TP + FP), 100 * rate(TP, TP + FN)); } private static List HEADER = Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp", "n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly", - "n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly"); + "n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly", + "PPV", "Sensitivity"); // making it a table public List getTableHeader() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index 8023a3413..15409b348 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; @@ -110,6 +111,10 @@ public class VariantEval2Walker extends RodWalker { @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) protected String[] KNOWN_NAMES = {"dbsnp"}; + @Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false) + protected String[] SAMPLES = {}; + private List SAMPLES_LIST = null; + // // Arguments for Mendelian Violation calculations // @@ -213,6 +218,8 @@ public class VariantEval2Walker extends RodWalker { // -------------------------------------------------------------------------------------------------------------- public void initialize() { + SAMPLES_LIST = Arrays.asList(SAMPLES); + determineAllEvalations(); List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); @@ -494,6 +501,13 @@ public class VariantEval2Walker extends RodWalker { throw new StingException("Found multiple variant contexts at " + context.getLocation()); VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null; + + if ( vc != null && vc.hasGenotypes(SAMPLES_LIST) ) { + //if ( ! name.equals("eval") ) logger.info(String.format("subsetting VC %s", vc)); + vc = vc.subContextFromGenotypes(vc.getGenotypes(SAMPLES_LIST).values()); + //if ( ! name.equals("eval") ) logger.info(String.format(" => VC %s", vc)); + } + map.put(name, allowExcludes && excludeComp(vc) ? null : vc); } } diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index 159a6c6ff..d6b6a7c2b 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -20,8 +20,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "d83605861576db9bc0d50d5c11b67a90"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "be922212c7cb8f8070158dab86949c4b"); + expectations.put("-L 1:1-10,000,000", "32b2e9758078b66e6d50d140acb37947"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "5ee420ebf7c2d3c2e3827c0114a6706d"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -41,10 +41,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "f03913dacc5e9938a0aa00f1e5a031de"; // next two examples should be the same! + String eqMD5s = "ba021a4c963200191710a220a5577753"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "0f886e7042c3999c3d87f848e0b58eb8"); + expectations.put(" -known comp_hapmap", "5ce16165f4242d77b4e82c704273c11d"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -62,7 +62,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("8162dcf2539e8241fb27cba2055631bf", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("0b29285da3ca778b9c8b7f62e99aa72d", "d41d8cd98f00b204e9800998ecf8427e")); executeTest("testVE2WriteVCF", spec); } }