PPV and Sensitivity added to validation tool output; support for arbitrary -sample arguments to subset variant contexts by sample
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2978 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
40d305bc7e
commit
4f4555c80f
|
|
@ -606,6 +606,14 @@ public class VariantContext {
|
||||||
*/
|
*/
|
||||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||||
|
|
||||||
|
public boolean hasGenotypes(Collection<String> sampleNames) {
|
||||||
|
for ( String name : sampleNames ) {
|
||||||
|
if ( ! genotypes.containsKey(name) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return set of all Genotypes associated with this context
|
* @return set of all Genotypes associated with this context
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,15 @@ import java.util.Arrays;
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*/
|
*/
|
||||||
public class ValidationRate extends VariantEvaluator {
|
public class ValidationRate extends VariantEvaluator {
|
||||||
|
|
||||||
|
// todo -- subset validation data by list of samples, if provided
|
||||||
|
|
||||||
|
// todo -- print out PPV and sensitivity numbers
|
||||||
|
|
||||||
class SiteStats {
|
class SiteStats {
|
||||||
long nPoly = 0, nMono = 0, nNoCall = 0;
|
long nPoly = 0, nMono = 0, nNoCall = 0;
|
||||||
|
|
||||||
double polyPercent() { return rate(nPoly, nPoly + nMono + nNoCall); }
|
double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); }
|
||||||
}
|
}
|
||||||
|
|
||||||
private SiteStats validationStats = new SiteStats();
|
private SiteStats validationStats = new SiteStats();
|
||||||
|
|
@ -47,16 +52,22 @@ public class ValidationRate extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String summaryLine() {
|
private String summaryLine() {
|
||||||
return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f",
|
long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono;
|
||||||
|
long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono;
|
||||||
|
long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall;
|
||||||
|
|
||||||
|
return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f %.2f %.2f",
|
||||||
validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(),
|
validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(),
|
||||||
evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(),
|
evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(),
|
||||||
evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent());
|
evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent(),
|
||||||
|
100 * rate(TP, TP + FP), 100 * rate(TP, TP + FN));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<String> HEADER =
|
private static List<String> HEADER =
|
||||||
Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp",
|
Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp",
|
||||||
"n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly",
|
"n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly",
|
||||||
"n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly");
|
"n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly",
|
||||||
|
"PPV", "Sensitivity");
|
||||||
|
|
||||||
// making it a table
|
// making it a table
|
||||||
public List<String> getTableHeader() {
|
public List<String> getTableHeader() {
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
|
|
@ -110,6 +111,10 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||||
protected String[] KNOWN_NAMES = {"dbsnp"};
|
protected String[] KNOWN_NAMES = {"dbsnp"};
|
||||||
|
|
||||||
|
@Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
|
||||||
|
protected String[] SAMPLES = {};
|
||||||
|
private List<String> SAMPLES_LIST = null;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Arguments for Mendelian Violation calculations
|
// Arguments for Mendelian Violation calculations
|
||||||
//
|
//
|
||||||
|
|
@ -213,6 +218,8 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
SAMPLES_LIST = Arrays.asList(SAMPLES);
|
||||||
|
|
||||||
determineAllEvalations();
|
determineAllEvalations();
|
||||||
List<VariantContextUtils.JexlVCMatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
|
List<VariantContextUtils.JexlVCMatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
|
||||||
|
|
||||||
|
|
@ -494,6 +501,13 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||||
throw new StingException("Found multiple variant contexts at " + context.getLocation());
|
throw new StingException("Found multiple variant contexts at " + context.getLocation());
|
||||||
|
|
||||||
VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null;
|
VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||||
|
|
||||||
|
if ( vc != null && vc.hasGenotypes(SAMPLES_LIST) ) {
|
||||||
|
//if ( ! name.equals("eval") ) logger.info(String.format("subsetting VC %s", vc));
|
||||||
|
vc = vc.subContextFromGenotypes(vc.getGenotypes(SAMPLES_LIST).values());
|
||||||
|
//if ( ! name.equals("eval") ) logger.info(String.format(" => VC %s", vc));
|
||||||
|
}
|
||||||
|
|
||||||
map.put(name, allowExcludes && excludeComp(vc) ? null : vc);
|
map.put(name, allowExcludes && excludeComp(vc) ? null : vc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testVE2Simple() {
|
public void testVE2Simple() {
|
||||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||||
expectations.put("-L 1:1-10,000,000", "d83605861576db9bc0d50d5c11b67a90");
|
expectations.put("-L 1:1-10,000,000", "32b2e9758078b66e6d50d140acb37947");
|
||||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "be922212c7cb8f8070158dab86949c4b");
|
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "5ee420ebf7c2d3c2e3827c0114a6706d");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||||
String extraArgs = entry.getKey();
|
String extraArgs = entry.getKey();
|
||||||
|
|
@ -41,10 +41,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
||||||
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||||
|
|
||||||
String eqMD5s = "f03913dacc5e9938a0aa00f1e5a031de"; // next two examples should be the same!
|
String eqMD5s = "ba021a4c963200191710a220a5577753"; // next two examples should be the same!
|
||||||
expectations.put("", eqMD5s);
|
expectations.put("", eqMD5s);
|
||||||
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
|
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
|
||||||
expectations.put(" -known comp_hapmap", "0f886e7042c3999c3d87f848e0b58eb8");
|
expectations.put(" -known comp_hapmap", "5ce16165f4242d77b4e82c704273c11d");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||||
String extraArgs2 = entry.getKey();
|
String extraArgs2 = entry.getKey();
|
||||||
|
|
@ -62,7 +62,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
||||||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||||
2,
|
2,
|
||||||
Arrays.asList("8162dcf2539e8241fb27cba2055631bf", "a3ce1d70d8ae3874807e9d61994d42af"));
|
Arrays.asList("0b29285da3ca778b9c8b7f62e99aa72d", "d41d8cd98f00b204e9800998ecf8427e"));
|
||||||
executeTest("testVE2WriteVCF", spec);
|
executeTest("testVE2WriteVCF", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue