PPV and Sensitivity added to validation tool output; support for arbitrary -sample arguments to subset variant contexts by sample
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2978 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
40d305bc7e
commit
4f4555c80f
|
|
@ -606,6 +606,14 @@ public class VariantContext {
|
|||
*/
|
||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||
|
||||
public boolean hasGenotypes(Collection<String> sampleNames) {
|
||||
for ( String name : sampleNames ) {
|
||||
if ( ! genotypes.containsKey(name) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return set of all Genotypes associated with this context
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -20,10 +20,15 @@ import java.util.Arrays;
|
|||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class ValidationRate extends VariantEvaluator {
|
||||
|
||||
// todo -- subset validation data by list of samples, if provided
|
||||
|
||||
// todo -- print out PPV and sensitivity numbers
|
||||
|
||||
class SiteStats {
|
||||
long nPoly = 0, nMono = 0, nNoCall = 0;
|
||||
|
||||
double polyPercent() { return rate(nPoly, nPoly + nMono + nNoCall); }
|
||||
double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); }
|
||||
}
|
||||
|
||||
private SiteStats validationStats = new SiteStats();
|
||||
|
|
@ -47,16 +52,22 @@ public class ValidationRate extends VariantEvaluator {
|
|||
}
|
||||
|
||||
private String summaryLine() {
|
||||
return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f",
|
||||
long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono;
|
||||
long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono;
|
||||
long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall;
|
||||
|
||||
return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f %.2f %.2f",
|
||||
validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(),
|
||||
evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(),
|
||||
evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent());
|
||||
evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent(),
|
||||
100 * rate(TP, TP + FP), 100 * rate(TP, TP + FN));
|
||||
}
|
||||
|
||||
private static List<String> HEADER =
|
||||
Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp",
|
||||
"n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly",
|
||||
"n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly");
|
||||
"n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly",
|
||||
"PPV", "Sensitivity");
|
||||
|
||||
// making it a table
|
||||
public List<String> getTableHeader() {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
|
|
@ -110,6 +111,10 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
protected String[] KNOWN_NAMES = {"dbsnp"};
|
||||
|
||||
@Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
|
||||
protected String[] SAMPLES = {};
|
||||
private List<String> SAMPLES_LIST = null;
|
||||
|
||||
//
|
||||
// Arguments for Mendelian Violation calculations
|
||||
//
|
||||
|
|
@ -213,6 +218,8 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public void initialize() {
|
||||
SAMPLES_LIST = Arrays.asList(SAMPLES);
|
||||
|
||||
determineAllEvalations();
|
||||
List<VariantContextUtils.JexlVCMatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
|
||||
|
||||
|
|
@ -494,6 +501,13 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
throw new StingException("Found multiple variant contexts at " + context.getLocation());
|
||||
|
||||
VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
if ( vc != null && vc.hasGenotypes(SAMPLES_LIST) ) {
|
||||
//if ( ! name.equals("eval") ) logger.info(String.format("subsetting VC %s", vc));
|
||||
vc = vc.subContextFromGenotypes(vc.getGenotypes(SAMPLES_LIST).values());
|
||||
//if ( ! name.equals("eval") ) logger.info(String.format(" => VC %s", vc));
|
||||
}
|
||||
|
||||
map.put(name, allowExcludes && excludeComp(vc) ? null : vc);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVE2Simple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "d83605861576db9bc0d50d5c11b67a90");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "be922212c7cb8f8070158dab86949c4b");
|
||||
expectations.put("-L 1:1-10,000,000", "32b2e9758078b66e6d50d140acb37947");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "5ee420ebf7c2d3c2e3827c0114a6706d");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -41,10 +41,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
String eqMD5s = "f03913dacc5e9938a0aa00f1e5a031de"; // next two examples should be the same!
|
||||
String eqMD5s = "ba021a4c963200191710a220a5577753"; // next two examples should be the same!
|
||||
expectations.put("", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap", "0f886e7042c3999c3d87f848e0b58eb8");
|
||||
expectations.put(" -known comp_hapmap", "5ce16165f4242d77b4e82c704273c11d");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -62,7 +62,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||
2,
|
||||
Arrays.asList("8162dcf2539e8241fb27cba2055631bf", "a3ce1d70d8ae3874807e9d61994d42af"));
|
||||
Arrays.asList("0b29285da3ca778b9c8b7f62e99aa72d", "d41d8cd98f00b204e9800998ecf8427e"));
|
||||
executeTest("testVE2WriteVCF", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue