Generic, easy-to-use variant evaluation Queue script that tests indel and SNP call sets against standard evaluation data sets for sensitivity and specificity
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5391 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dc33fbed7c
commit
bf2e02f472
|
|
@ -0,0 +1,191 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.QScript
|
||||||
|
import org.broadinstitute.sting.queue.extensions.gatk.RodBind
|
||||||
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
|
|
||||||
|
class StandardVariantEvaluation extends QScript {
|
||||||
|
// todo -- update to released version when things stabilize
|
||||||
|
@Argument(doc="gatkJarFile", required=false)
|
||||||
|
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
|
||||||
|
|
||||||
|
@Argument(shortName = "R", doc="B37 reference sequence: defaults to broad standard location", required=false)
|
||||||
|
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
||||||
|
|
||||||
|
@Argument(shortName = "intervals", doc="intervals to evaluate. Only supports evaluation on chromosome 20 now, as most evaluation data is there", required=false)
|
||||||
|
val TARGET_INTERVAL: String = "20"
|
||||||
|
|
||||||
|
@Argument(shortName = "includeUnion", doc="If provided, we'll create a union of the evaluation data sets for evaluation", required=false)
|
||||||
|
val CREATE_UNION: Boolean = false
|
||||||
|
|
||||||
|
@Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false)
|
||||||
|
val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/"
|
||||||
|
|
||||||
|
val COMPS_DIR = DATA_DIR + "/comps/"
|
||||||
|
val EVALS_DIR = DATA_DIR + "/evals/"
|
||||||
|
|
||||||
|
@Argument(shortName = "moreSNPsToEval", doc="Path to additional SNP call sets for evaluation", required=false)
|
||||||
|
val moreSNPsToEval: List[File] = Nil
|
||||||
|
|
||||||
|
@Argument(shortName = "moreIndelsToEval", doc="Path to additional Indel call sets for evaluation", required=false)
|
||||||
|
val moreIndelsToEval: List[File] = Nil
|
||||||
|
|
||||||
|
val VARIANT_TYPES: List[String] = List("indels", "snps")
|
||||||
|
val VARIANT_TYPE_VT: Map[String, List[org.broad.tribble.util.variantcontext.VariantContext.Type]] = Map(
|
||||||
|
"indels" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.INDEL, org.broad.tribble.util.variantcontext.VariantContext.Type.MIXED, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION),
|
||||||
|
"snps" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.SNP, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION)
|
||||||
|
)
|
||||||
|
|
||||||
|
val SITES_DIR: String = "sitesFiles"
|
||||||
|
|
||||||
|
// path to b37 DBSNP
|
||||||
|
@Argument(shortName = "dbsnp", doc="Path to DBSNP **VCF** for evaluation", required=false)
|
||||||
|
val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b37.leftAligned.vcf")
|
||||||
|
//val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf");
|
||||||
|
|
||||||
|
class Comp(val name: String, val evalType: String, val filename: String, val MakeHomVar: Boolean = false) {
|
||||||
|
val originalFile = new File(COMPS_DIR + filename)
|
||||||
|
val file: File = if ( MakeHomVar ) swapExt(originalFile, ".vcf",".homvar.vcf") else originalFile
|
||||||
|
val sitesFile = new File(SITES_DIR + "/" + swapExt(file, ".vcf", ".sites.vcf").getName)
|
||||||
|
}
|
||||||
|
|
||||||
|
class Eval(val name: String, val evalType: String, val filename: String, val overrideFile: File = null ) {
|
||||||
|
val file: File = if ( overrideFile != null ) overrideFile else new File(EVALS_DIR + "/" + filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
var COMPS: List[Comp] = Nil
|
||||||
|
def addComp(comp: Comp) { COMPS = comp :: COMPS }
|
||||||
|
|
||||||
|
var EVALS: List[Eval] = Nil
|
||||||
|
def addEval(eval: Eval) { EVALS = eval :: EVALS }
|
||||||
|
def addEvalFromCMD(file: File, t: String) { addEval(new Eval(file.getName, t, null, file)) }
|
||||||
|
|
||||||
|
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||||
|
this.logging_level = "INFO";
|
||||||
|
this.jarFile = gatkJarFile;
|
||||||
|
this.intervalsString = List(TARGET_INTERVAL);
|
||||||
|
this.reference_sequence = referenceFile;
|
||||||
|
this.memoryLimit = Some(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
def initializeStandardDataFiles() = {
|
||||||
|
//
|
||||||
|
// Standard evaluation files for indels
|
||||||
|
//
|
||||||
|
addComp(new Comp("NA12878.homvar.GATK", "indels", "Indels.NA12878_WGS.filtered_Q50.0_QD5.0_SB-1.0_HR18.vcf", true))
|
||||||
|
addComp(new Comp("CG.38samples", "indels", "CG.Indels.leftAligned.b37.vcf"))
|
||||||
|
addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true))
|
||||||
|
addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
|
||||||
|
addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
|
||||||
|
|
||||||
|
//
|
||||||
|
// INDEL call sets
|
||||||
|
//
|
||||||
|
addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf"))
|
||||||
|
addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf"))
|
||||||
|
addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf"))
|
||||||
|
|
||||||
|
//
|
||||||
|
// Standard evaluation files for SNPs
|
||||||
|
//
|
||||||
|
addComp(new Comp("NA12878.homvar.GATK", "snps", "NA12878.HiSeq19.cut.vcf", true))
|
||||||
|
addComp(new Comp("CG.38samples", "snps", "CG.38samples.b37.vcf"))
|
||||||
|
addComp(new Comp("NA12878.homvar.CG", "snps", "NA12878.CG.b37.snps.vcf", true))
|
||||||
|
addComp(new Comp("HapMap3.3", "snps", "hapmap3.3.sites_r27_nr.b37_fwd.vcf"))
|
||||||
|
addComp(new Comp("OMNI.2.5M", "snps", "omni2.5.1212samples.b37.sites.chr20.monoAreAC0.vcf"))
|
||||||
|
addComp(new Comp("g1k.pilot1.validation", "snps", "1000G.snp.validation.b37.vcf"))
|
||||||
|
|
||||||
|
//
|
||||||
|
// SNP call sets
|
||||||
|
//
|
||||||
|
addEval(new Eval("gatk", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf"))
|
||||||
|
// todo -- are there other good call sets for evaluation?
|
||||||
|
// todo -- add hg19 na12878 64x
|
||||||
|
}
|
||||||
|
|
||||||
|
def script = {
|
||||||
|
val sitesDir = new File(SITES_DIR)
|
||||||
|
if ( ! sitesDir.exists ) sitesDir.mkdirs()
|
||||||
|
|
||||||
|
initializeStandardDataFiles();
|
||||||
|
|
||||||
|
// add additional files for evaluation, if necessary
|
||||||
|
moreSNPsToEval.foreach(addEvalFromCMD(_, "snps"))
|
||||||
|
moreIndelsToEval.foreach(addEvalFromCMD(_, "indels"))
|
||||||
|
|
||||||
|
//
|
||||||
|
// create hom-var versions of key files
|
||||||
|
//
|
||||||
|
for ( comp <- COMPS )
|
||||||
|
if ( comp.MakeHomVar )
|
||||||
|
add(new SelectHomVars(comp.originalFile, comp.file))
|
||||||
|
|
||||||
|
for ( comp <- COMPS )
|
||||||
|
add(new JustSites(comp.file, comp.sitesFile))
|
||||||
|
|
||||||
|
//
|
||||||
|
// Loop over evaluation types
|
||||||
|
//
|
||||||
|
for ( evalType <- VARIANT_TYPES ) {
|
||||||
|
var evalsOfType = EVALS.filter(_.evalType == evalType)
|
||||||
|
val compsOfType = COMPS.filter(_.evalType == evalType)
|
||||||
|
|
||||||
|
// if desired and possible, create a union.X.vcf file
|
||||||
|
if ( CREATE_UNION && evalsOfType.size > 1 ) {
|
||||||
|
val union: File = new File("union.%s.vcf".format(evalType))
|
||||||
|
add(new MyCombine(evalsOfType.map(_.file), union));
|
||||||
|
evalsOfType = new Eval("union", evalType, null, union) :: evalsOfType
|
||||||
|
}
|
||||||
|
|
||||||
|
// our root VE
|
||||||
|
val VE = new MyEval()
|
||||||
|
VE.VT = VARIANT_TYPE_VT(evalType)
|
||||||
|
VE.o = new File(evalType + ".eval")
|
||||||
|
|
||||||
|
// add evals
|
||||||
|
for ( calls <- evalsOfType )
|
||||||
|
VE.rodBind :+= RodBind("eval_" + calls.name, "VCF", calls.file)
|
||||||
|
|
||||||
|
// add comps
|
||||||
|
//VE.rodBind :+= RodBind("dbsnp", "VCF", MY_DBSNP)
|
||||||
|
for ( comp <- compsOfType )
|
||||||
|
VE.rodBind :+= RodBind("comp_" + comp.name, "VCF", comp.sitesFile)
|
||||||
|
|
||||||
|
add(VE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Select homozygous non-reference sites from a single deep data set
|
||||||
|
*/
|
||||||
|
class SelectHomVars(@Input(doc="foo") vcf: File, @Output(doc="foo") out: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
||||||
|
this.rodBind :+= RodBind("variant", "VCF", vcf)
|
||||||
|
this.o = out
|
||||||
|
this.select ++= List("\"AC == 2\"")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple union
|
||||||
|
*/
|
||||||
|
class MyCombine(@Input(doc="foo") vcfs: List[File], @Output(doc="foo") out: File) extends CombineVariants with UNIVERSAL_GATK_ARGS {
|
||||||
|
for ( vcf <- vcfs )
|
||||||
|
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
||||||
|
this.o = out
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A command line (cut) that removes all genotyping information from a file
|
||||||
|
*/
|
||||||
|
class JustSites(@Input(doc="foo") in: File, @Output(doc="foo") out: File) extends CommandLineFunction {
|
||||||
|
def commandLine = "cut -f 1-8 %s > %s".format(in, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for VariantEval used here
|
||||||
|
*/
|
||||||
|
class MyEval() extends VariantEval with UNIVERSAL_GATK_ARGS {
|
||||||
|
this.noST = true
|
||||||
|
this.evalModule :+= "ValidationReport"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1,147 +0,0 @@
|
||||||
package oneoffs.depristo
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.QScript
|
|
||||||
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk.RodBind
|
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
|
||||||
|
|
||||||
class StandardVariantEvalation extends QScript {
|
|
||||||
@Argument(doc="gatkJarFile", required=false)
|
|
||||||
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
|
|
||||||
|
|
||||||
@Argument(shortName = "R", doc="ref", required=false)
|
|
||||||
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
|
||||||
|
|
||||||
@Argument(shortName = "intervals", doc="intervals", required=false)
|
|
||||||
val TARGET_INTERVAL: String = "20";
|
|
||||||
|
|
||||||
val DATA_DIR = "data/" // todo -- make into an argument
|
|
||||||
|
|
||||||
val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf");
|
|
||||||
|
|
||||||
val JOB_QUEUE = "hour"
|
|
||||||
|
|
||||||
// todo -- add arguments to include other files for SNPs and indels
|
|
||||||
|
|
||||||
val VARIANT_TYPES: List[String] = List("indels", "snps")
|
|
||||||
val VARIANT_TYPE_VT: Map[String, List[org.broad.tribble.util.variantcontext.VariantContext.Type]] = Map(
|
|
||||||
"indels" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.INDEL, org.broad.tribble.util.variantcontext.VariantContext.Type.MIXED, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION),
|
|
||||||
"snps" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.SNP, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION)
|
|
||||||
)
|
|
||||||
|
|
||||||
class Comp(val name: String, val evalType: String, val filename: String, val MakeHomVar: Boolean = false) {
|
|
||||||
val originalFile = new File(DATA_DIR + filename)
|
|
||||||
val file: File = if ( MakeHomVar ) swapExt(originalFile, ".vcf",".homvar.vcf") else originalFile
|
|
||||||
val sitesFile = swapExt(file, ".vcf", ".sites.vcf")
|
|
||||||
}
|
|
||||||
|
|
||||||
class Eval(val name: String, val evalType: String, val file: File) {}
|
|
||||||
|
|
||||||
var COMPS: List[Comp] = Nil
|
|
||||||
def addComp(comp: Comp) { COMPS = comp :: COMPS }
|
|
||||||
|
|
||||||
var EVALS: List[Eval] = Nil
|
|
||||||
def addEval(eval: Eval) { EVALS = eval :: EVALS }
|
|
||||||
|
|
||||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
|
||||||
this.logging_level = "INFO";
|
|
||||||
this.jarFile = gatkJarFile;
|
|
||||||
this.intervalsString = List(TARGET_INTERVAL);
|
|
||||||
this.reference_sequence = referenceFile;
|
|
||||||
this.jobQueue = JOB_QUEUE;
|
|
||||||
this.memoryLimit = Some(2)
|
|
||||||
}
|
|
||||||
|
|
||||||
def script = {
|
|
||||||
//
|
|
||||||
// Standard evaluation files for indels
|
|
||||||
//
|
|
||||||
addComp(new Comp("homVarNA12878GATK", "indels", "Indels.NA12878_WGS.filtered_Q50.0_QD5.0_SB-1.0_HR18.vcf", true))
|
|
||||||
addComp(new Comp("CG38", "indels", "CG.Indels.leftAligned.b37.vcf"))
|
|
||||||
addComp(new Comp("homVarNA12878CG", "indels", "NA12878.CG.b37.indels.vcf", true))
|
|
||||||
addComp(new Comp("Pilot1Validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
|
|
||||||
addComp(new Comp("Pilot1Validation", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
|
|
||||||
|
|
||||||
//
|
|
||||||
// INDEL call sets
|
|
||||||
//
|
|
||||||
addEval(new Eval("dindel", "indels", new File("20110208.chr20.dindel2.EUR.sites.vcf")))
|
|
||||||
addEval(new Eval("si", "indels", new File("20101123.chr20.si.v2.EUR.sites.vcf")))
|
|
||||||
addEval(new Eval("gatk", "indels", new File("EUR.phase1.chr20.broad.filtered.indels.sites.vcf")))
|
|
||||||
|
|
||||||
//
|
|
||||||
// Standard evaluation files for SNPs
|
|
||||||
//
|
|
||||||
addComp(new Comp("homVarNA12878GATK", "snps", "NA12878.HiSeq19.cut.vcf", true))
|
|
||||||
addComp(new Comp("CG38", "snps", "CG.38samples.b37.vcf"))
|
|
||||||
addComp(new Comp("homVarNA12878CG", "snps", "NA12878.CG.b37.snps.vcf", true))
|
|
||||||
|
|
||||||
//
|
|
||||||
// SNP call sets
|
|
||||||
//
|
|
||||||
addEval(new Eval("gatk", "snps", new File("EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf")))
|
|
||||||
|
|
||||||
//
|
|
||||||
// create hom-var versions of key files
|
|
||||||
//
|
|
||||||
for ( comp <- COMPS )
|
|
||||||
if ( comp.MakeHomVar )
|
|
||||||
add(new SelectHomVars(comp.originalFile, comp.file))
|
|
||||||
|
|
||||||
for ( comp <- COMPS )
|
|
||||||
add(new JustSites(comp.file, comp.sitesFile))
|
|
||||||
|
|
||||||
//
|
|
||||||
// Loop over evaluation types
|
|
||||||
//
|
|
||||||
for ( evalType <- VARIANT_TYPES ) {
|
|
||||||
var evalsOfType = EVALS.filter(_.evalType == evalType)
|
|
||||||
val compsOfType = COMPS.filter(_.evalType == evalType)
|
|
||||||
|
|
||||||
if ( evalsOfType.size > 1 ) {
|
|
||||||
val union: File = new File("union.%s.vcf".format(evalType))
|
|
||||||
add(new MyCombine(evalsOfType.map(_.file), union));
|
|
||||||
evalsOfType = new Eval("union", evalType, union) :: evalsOfType
|
|
||||||
}
|
|
||||||
|
|
||||||
val VE = new MyEval()
|
|
||||||
VE.VT = VARIANT_TYPE_VT(evalType)
|
|
||||||
VE.o = new File(evalType + ".eval")
|
|
||||||
|
|
||||||
// add evals
|
|
||||||
for ( calls <- evalsOfType )
|
|
||||||
VE.rodBind :+= RodBind("eval_" + calls.name, "VCF", calls.file)
|
|
||||||
|
|
||||||
// add comps
|
|
||||||
//VE.rodBind :+= RodBind("dbsnp", "VCF", MY_DBSNP)
|
|
||||||
for ( comp <- compsOfType )
|
|
||||||
VE.rodBind :+= RodBind("comp_" + comp.name, "VCF", comp.sitesFile)
|
|
||||||
|
|
||||||
add(VE)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SelectHomVars(@Input(doc="foo") vcf: File, @Output(doc="foo") out: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
|
||||||
this.rodBind :+= RodBind("variant", "VCF", vcf)
|
|
||||||
this.o = out
|
|
||||||
this.select ++= List("\"AC == 2\"")
|
|
||||||
}
|
|
||||||
|
|
||||||
class MyCombine(@Input(doc="foo") vcfs: List[File], @Output(doc="foo") out: File) extends CombineVariants with UNIVERSAL_GATK_ARGS {
|
|
||||||
for ( vcf <- vcfs )
|
|
||||||
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
|
||||||
this.o = out
|
|
||||||
}
|
|
||||||
|
|
||||||
class JustSites(@Input(doc="foo") in: File, @Output(doc="foo") out: File) extends CommandLineFunction {
|
|
||||||
def commandLine = "cut -f 1-8 %s > %s".format(in, out)
|
|
||||||
this.jobQueue = JOB_QUEUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MyEval() extends VariantEval with UNIVERSAL_GATK_ARGS {
|
|
||||||
this.noST = true
|
|
||||||
this.evalModule :+= "ValidationReport"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -9,8 +9,18 @@ class ManySampleUGPerformanceTesting extends QScript {
|
||||||
@Argument(shortName = "R", doc="ref", required=false)
|
@Argument(shortName = "R", doc="ref", required=false)
|
||||||
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
||||||
|
|
||||||
val TARGET_INTERVAL = "my.intervals"
|
@Argument(shortName = "bams", doc="BAMs", required=true)
|
||||||
val FULL_BAM_LIST = new File("allPopulations_phase1_release.no_solid.list")
|
val FULL_BAM_LIST: File = null;
|
||||||
|
|
||||||
|
@Argument(shortName = "intervals", doc="intervals", required=true)
|
||||||
|
val TARGET_INTERVAL: String = null;
|
||||||
|
|
||||||
|
@Argument(shortName = "preMerge", doc="preMerge", required=false)
|
||||||
|
val PRE_MERGE: Boolean = false;
|
||||||
|
|
||||||
|
@Argument(shortName = "dcov", doc="dcov", required=false)
|
||||||
|
val DCOV: Int = 50;
|
||||||
|
|
||||||
val MERGED_DIR = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/manySampleUGPerformance/")
|
val MERGED_DIR = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/manySampleUGPerformance/")
|
||||||
|
|
||||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||||
|
|
@ -24,26 +34,37 @@ class ManySampleUGPerformanceTesting extends QScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
for (nSamples <- List(1, 2, 5, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900)) {
|
for (nSamples <- List(1, 2, 4, 8, 16, 32, 60)) {
|
||||||
|
// for (nSamples <- List(1, 2, 5, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900)) {
|
||||||
// for (nSamples <- List(10)) {
|
// for (nSamples <- List(10)) {
|
||||||
val sublist = new SliceList(nSamples)
|
val sublist = new SliceList(nSamples)
|
||||||
val mergeSublist = new MergeBAMs(sublist.list)
|
val mergeSublist = new MergeBAMs(sublist.list)
|
||||||
|
|
||||||
|
val name: String = if ( PRE_MERGE ) "pre_merge" else "dynamic_merge"
|
||||||
|
val bams: File = if ( PRE_MERGE ) mergeSublist.o else sublist.list
|
||||||
|
|
||||||
add(sublist)
|
add(sublist)
|
||||||
add(mergeSublist)
|
if ( PRE_MERGE ) {
|
||||||
add(new Index(mergeSublist.o) )
|
add(mergeSublist)
|
||||||
|
add(new Index(mergeSublist.o) )
|
||||||
|
}
|
||||||
|
|
||||||
// SNP calling
|
// SNP calling
|
||||||
//add(new Call(sublist.list, nSamples, "dynamic_merge"))
|
//add(new Call(sublist.list, nSamples, "dynamic_merge"))
|
||||||
add(new Call(mergeSublist.o, nSamples, "pre_merge"))
|
val gt = new Call(bams, nSamples, name);
|
||||||
|
gt.exactCalculation = Some(org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel.ExactCalculation.N2_GOLD_STANDARD)
|
||||||
|
add(gt)
|
||||||
|
|
||||||
|
val gtLinear = new Call(bams, nSamples, name + "_linear");
|
||||||
|
gtLinear.exactCalculation = Some(org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL)
|
||||||
|
add(gtLinear)
|
||||||
|
|
||||||
// SNP calling -- no annotations
|
// SNP calling -- no annotations
|
||||||
//add(new Call(sublist.list, nSamples, "dynamic_merge_no_annotations") { this.G :+= "None"; })
|
//add(new Call(bams.list, nSamples, "dynamic_merge_no_annotations") { this.G :+= "None"; })
|
||||||
add(new Call(mergeSublist.o, nSamples, "pre_merge_no_annotations") { this.G :+= "none"; })
|
|
||||||
|
|
||||||
// CountLoci
|
// CountLoci
|
||||||
//add(new MyCountLoci(sublist.list, nSamples, "dynamic_merge"))
|
//add(new MyCountLoci(sublist.list, nSamples, "dynamic_merge"))
|
||||||
add(new MyCountLoci(mergeSublist.o, nSamples, "pre_merge"))
|
add(new MyCountLoci(bams, nSamples, name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,20 +84,21 @@ class ManySampleUGPerformanceTesting extends QScript {
|
||||||
@Output(doc="foo") var outVCF: File = new File("%s.%d.%s.vcf".format(bamList.getName, n, name))
|
@Output(doc="foo") var outVCF: File = new File("%s.%d.%s.vcf".format(bamList.getName, n, name))
|
||||||
this.input_file :+= bamList
|
this.input_file :+= bamList
|
||||||
this.stand_call_conf = Option(10.0)
|
this.stand_call_conf = Option(10.0)
|
||||||
this.dcov = Option(50);
|
this.dcov = Option(DCOV);
|
||||||
this.o = outVCF
|
this.o = outVCF
|
||||||
}
|
}
|
||||||
|
|
||||||
class MyCountLoci(@Input(doc="foo") bamList: File, n: Int, name: String) extends CountLoci with UNIVERSAL_GATK_ARGS {
|
class MyCountLoci(@Input(doc="foo") bamList: File, n: Int, name: String) extends CountLoci with UNIVERSAL_GATK_ARGS {
|
||||||
@Output(doc="foo") var outFile: File = new File("%s.%d.%s.txt".format(bamList.getName, n, name))
|
@Output(doc="foo") var outFile: File = new File("%s.%d.%s.txt".format(bamList.getName, n, name))
|
||||||
this.input_file :+= bamList
|
this.input_file :+= bamList
|
||||||
this.dcov = Option(50);
|
this.dcov = Option(DCOV);
|
||||||
this.o = outFile
|
this.o = outFile
|
||||||
}
|
}
|
||||||
|
|
||||||
class SliceList(n: Int) extends CommandLineFunction {
|
class SliceList(n: Int) extends CommandLineFunction {
|
||||||
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
|
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
|
||||||
def commandLine = "head -n %d %s > %s".format(n, FULL_BAM_LIST, list)
|
def commandLine = "head -n %d %s > %s".format(n, FULL_BAM_LIST, list)
|
||||||
|
this.jobQueue = "gsa";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue