Simple program that calls indels in CEU trio exomes and WGS can compared the results. Overall the indel calls really look good to me, given reasonably good input BAM files.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6006 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
43fdd31e20
commit
27d4b317fc
|
|
@ -15,7 +15,7 @@ if ( HAVE_RAW_DATA ) {
|
|||
+ geom_abline(slope=1, linetype=2))
|
||||
# + geom_smooth(se=T, size=1.5, aes(weight=Sum)))
|
||||
} else {
|
||||
eByComp = read.table("~/Dropbox/Analysis/genotypeAccuracy/NA12878.hm3.vcf.cgl.table.eByComp.tsv", header=T)
|
||||
eByComp = read.table("~/Dropbox/GSA members/Analysis/genotypeAccuracy/NA12878.hm3.vcf.cgl.table.eByComp.tsv", header=T)
|
||||
}
|
||||
|
||||
#print(subset(countsByTech, pGGivenD > 18 & pGGivenD < 22 & pGGivenDType == "QofABGivenD"))
|
||||
|
|
|
|||
|
|
@ -0,0 +1,113 @@
|
|||
package oneoffs.depristo
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
|
||||
|
||||
class IndelCallerEvaluation extends QScript {
|
||||
val BUNDLE = "/humgen/gsa-hpprojects/GATK/bundle/current"
|
||||
|
||||
@Argument(doc="gatkJarFile", required=false)
|
||||
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
|
||||
|
||||
@Argument(shortName = "R", doc="ref", required=false)
|
||||
var referenceFile: File = new File(BUNDLE + "/b37/human_g1k_v37.fasta")
|
||||
|
||||
@Argument(shortName = "bam", doc="BAM", required=true)
|
||||
val bams: List[File] = null;
|
||||
|
||||
@Argument(shortName = "intervals", doc="intervals", required=false)
|
||||
val myIntervals: String = null;
|
||||
|
||||
@Argument(shortName = "dcov", doc="dcov", required=false)
|
||||
val DCOV: Int = 250;
|
||||
|
||||
val dbSNP: File = new File(BUNDLE + "/b37/dbsnp_132.b37.vcf")
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
this.logging_level = "INFO";
|
||||
this.jarFile = gatkJarFile;
|
||||
this.reference_sequence = referenceFile;
|
||||
this.memoryLimit = 4
|
||||
|
||||
if ( intervals != null )
|
||||
this.intervalsString = List(myIntervals);
|
||||
}
|
||||
|
||||
trait CoFoJa extends JavaCommandLineFunction {
|
||||
override def javaOpts = super.javaOpts // + " -javaagent:lib/cofoja.jar"
|
||||
}
|
||||
|
||||
def processOne(bam: File, gsaProduction: Boolean): File = {
|
||||
val rawVCF = new Call(bam, gsaProduction)
|
||||
add(rawVCF)
|
||||
|
||||
val filterIndels = new FilterIndels(rawVCF.out)
|
||||
add(filterIndels)
|
||||
|
||||
// create a variant eval for us
|
||||
add(new Eval(filterIndels.out))
|
||||
return filterIndels.out
|
||||
}
|
||||
|
||||
def script = {
|
||||
for ( gsaProduction <- List(true, false)) {
|
||||
val vcfs = bams.map(processOne(_, gsaProduction))
|
||||
|
||||
val combineCalls = new CombineVariants with UNIVERSAL_GATK_ARGS
|
||||
for ( vcf <- vcfs )
|
||||
combineCalls.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
||||
|
||||
combineCalls.filteredrecordsmergetype = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED
|
||||
combineCalls.out = "combined" + productionString(gsaProduction) + ".vcf"
|
||||
add(combineCalls)
|
||||
|
||||
add(new ToTable(combineCalls.out))
|
||||
}
|
||||
}
|
||||
|
||||
class FilterIndels(@Input vcf: File) extends VariantFiltration with UNIVERSAL_GATK_ARGS {
|
||||
this.variantVCF = vcf
|
||||
this.filterName = List("Indel_QUAL", "Indel_SB", "Indel_QD")
|
||||
this.filterExpression = List("\"QUAL<30.0\"", "\"SB>-1.0\"", "\"QD<2.0\"")
|
||||
this.out = swapExt(vcf,".vcf",".filtered.vcf")
|
||||
}
|
||||
|
||||
class ToTable(@Input vcf: File) extends VariantsToTable with UNIVERSAL_GATK_ARGS {
|
||||
this.rodBind :+= RodBind("variant", "VCF", vcf)
|
||||
this.fields = List("FILTER", "set")
|
||||
this.out = swapExt(vcf,".vcf",".table")
|
||||
this.raw = true
|
||||
}
|
||||
|
||||
class Eval(@Input vcf: File) extends VariantEval with UNIVERSAL_GATK_ARGS {
|
||||
this.rodBind :+= RodBind("eval", "VCF", vcf)
|
||||
if ( dbSNP.exists() )
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.doNotUseAllStandardStratifications = true
|
||||
this.doNotUseAllStandardModules = true
|
||||
this.evalModule = List("CountVariants", "IndelStatistics", "CompOverlap")
|
||||
this.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "JexlExpression")
|
||||
this.out = swapExt(vcf,".vcf",".eval")
|
||||
}
|
||||
|
||||
def productionString(gsaProduction: Boolean): String = {
|
||||
return if ( gsaProduction ) ".prod" else ".expt"
|
||||
}
|
||||
|
||||
class Call(@Input(doc="foo") bam: File, gsaProduction: Boolean) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
|
||||
@Output(doc="foo") var outVCF: File = swapExt(bam,".bam", productionString(gsaProduction) + ".indels.vcf")
|
||||
this.input_file = List(bam)
|
||||
this.stand_call_conf = 50.0
|
||||
this.stand_emit_conf = 50.0
|
||||
this.dcov = DCOV;
|
||||
this.o = outVCF
|
||||
|
||||
this.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.INDEL
|
||||
this.GSA_PRODUCTION_ONLY = gsaProduction
|
||||
|
||||
if ( dbSNP.exists() )
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue