Minor usability improvements to the standard eval script.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5551 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-04-01 17:36:50 +00:00
parent 0927b7c297
commit f2c4356a40
1 changed files with 38 additions and 28 deletions

View File

@ -21,6 +21,9 @@ class StandardVariantEvaluation extends QScript {
@Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false) @Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false)
val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/" val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/"
@Argument(shortName = "evalStandard1000GCalls", doc="If provided, we'll include some standard 1000G data for evaluation", required=false)
val EVAL_STANDARD_1000G_CALLS: Boolean = false
val COMPS_DIR = DATA_DIR + "/comps/" val COMPS_DIR = DATA_DIR + "/comps/"
val EVALS_DIR = DATA_DIR + "/evals/" val EVALS_DIR = DATA_DIR + "/evals/"
@ -77,13 +80,17 @@ class StandardVariantEvaluation extends QScript {
addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true)) addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true))
addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf")) addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf")) addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
addComp(new Comp("NA12878.Mullikin", "indels", "NA12878.DIPline.NQScm.expanded.chr20.b37.minReads_2_or_gt2bp.vcf"))
// //
// INDEL call sets // INDEL call sets
// //
if ( EVAL_STANDARD_1000G_CALLS ) {
addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf")) addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf"))
addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf")) addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf"))
addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf")) addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf"))
}
// //
// Standard evaluation files for SNPs // Standard evaluation files for SNPs
@ -98,10 +105,10 @@ class StandardVariantEvaluation extends QScript {
// //
// SNP call sets // SNP call sets
// //
if ( EVAL_STANDARD_1000G_CALLS ) {
addEval(new Eval("1000G.gatk.eurPlus.phase1", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf")) addEval(new Eval("1000G.gatk.eurPlus.phase1", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf"))
addEval(new Eval("1000G.high_specificity.phase1", "snps", "ALL.phase1.chr20.projectConsensus.highSpecificity.snps.genotypes.sites.vcf")) addEval(new Eval("1000G.high_specificity.phase1", "snps", "ALL.phase1.chr20.projectConsensus.highSpecificity.snps.genotypes.sites.vcf"))
// todo -- are there other good call sets for evaluation? }
// todo -- add hg19 na12878 64x
} }
def script = { def script = {
@ -131,6 +138,8 @@ class StandardVariantEvaluation extends QScript {
var evalsOfType = EVALS.filter(_.evalType == evalType) var evalsOfType = EVALS.filter(_.evalType == evalType)
val compsOfType = COMPS.filter(_.evalType == evalType) val compsOfType = COMPS.filter(_.evalType == evalType)
if ( evalsOfType.size > 0 ) {
// if desired and possible, create a union.X.vcf file // if desired and possible, create a union.X.vcf file
if ( CREATE_UNION && evalsOfType.size > 1 ) { if ( CREATE_UNION && evalsOfType.size > 1 ) {
val union: File = new File("union.%s.vcf".format(evalType)) val union: File = new File("union.%s.vcf".format(evalType))
@ -155,6 +164,7 @@ class StandardVariantEvaluation extends QScript {
add(VE) add(VE)
} }
} }
}
/** /**
* Select homozygous non-reference sites from a single deep data set * Select homozygous non-reference sites from a single deep data set