Simple program that calls indels in CEU trio exomes and WGS can compared the results. Overall the indel calls really look good to me, given reasonably good input BAM files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6006 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-16 12:56:04 +00:00
parent 43fdd31e20
commit 27d4b317fc
2 changed files with 114 additions and 1 deletions

View File

@ -15,7 +15,7 @@ if ( HAVE_RAW_DATA ) {
+ geom_abline(slope=1, linetype=2))
# + geom_smooth(se=T, size=1.5, aes(weight=Sum)))
} else {
eByComp = read.table("~/Dropbox/Analysis/genotypeAccuracy/NA12878.hm3.vcf.cgl.table.eByComp.tsv", header=T)
eByComp = read.table("~/Dropbox/GSA members/Analysis/genotypeAccuracy/NA12878.hm3.vcf.cgl.table.eByComp.tsv", header=T)
}
#print(subset(countsByTech, pGGivenD > 18 & pGGivenD < 22 & pGGivenDType == "QofABGivenD"))

View File

@ -0,0 +1,113 @@
package oneoffs.depristo
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
class IndelCallerEvaluation extends QScript {
val BUNDLE = "/humgen/gsa-hpprojects/GATK/bundle/current"
@Argument(doc="gatkJarFile", required=false)
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
@Argument(shortName = "R", doc="ref", required=false)
var referenceFile: File = new File(BUNDLE + "/b37/human_g1k_v37.fasta")
@Argument(shortName = "bam", doc="BAM", required=true)
val bams: List[File] = null;
@Argument(shortName = "intervals", doc="intervals", required=false)
val myIntervals: String = null;
@Argument(shortName = "dcov", doc="dcov", required=false)
val DCOV: Int = 250;
val dbSNP: File = new File(BUNDLE + "/b37/dbsnp_132.b37.vcf")
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
this.logging_level = "INFO";
this.jarFile = gatkJarFile;
this.reference_sequence = referenceFile;
this.memoryLimit = 4
if ( intervals != null )
this.intervalsString = List(myIntervals);
}
trait CoFoJa extends JavaCommandLineFunction {
override def javaOpts = super.javaOpts // + " -javaagent:lib/cofoja.jar"
}
def processOne(bam: File, gsaProduction: Boolean): File = {
val rawVCF = new Call(bam, gsaProduction)
add(rawVCF)
val filterIndels = new FilterIndels(rawVCF.out)
add(filterIndels)
// create a variant eval for us
add(new Eval(filterIndels.out))
return filterIndels.out
}
def script = {
for ( gsaProduction <- List(true, false)) {
val vcfs = bams.map(processOne(_, gsaProduction))
val combineCalls = new CombineVariants with UNIVERSAL_GATK_ARGS
for ( vcf <- vcfs )
combineCalls.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
combineCalls.filteredrecordsmergetype = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED
combineCalls.out = "combined" + productionString(gsaProduction) + ".vcf"
add(combineCalls)
add(new ToTable(combineCalls.out))
}
}
class FilterIndels(@Input vcf: File) extends VariantFiltration with UNIVERSAL_GATK_ARGS {
this.variantVCF = vcf
this.filterName = List("Indel_QUAL", "Indel_SB", "Indel_QD")
this.filterExpression = List("\"QUAL<30.0\"", "\"SB>-1.0\"", "\"QD<2.0\"")
this.out = swapExt(vcf,".vcf",".filtered.vcf")
}
class ToTable(@Input vcf: File) extends VariantsToTable with UNIVERSAL_GATK_ARGS {
this.rodBind :+= RodBind("variant", "VCF", vcf)
this.fields = List("FILTER", "set")
this.out = swapExt(vcf,".vcf",".table")
this.raw = true
}
class Eval(@Input vcf: File) extends VariantEval with UNIVERSAL_GATK_ARGS {
this.rodBind :+= RodBind("eval", "VCF", vcf)
if ( dbSNP.exists() )
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.doNotUseAllStandardStratifications = true
this.doNotUseAllStandardModules = true
this.evalModule = List("CountVariants", "IndelStatistics", "CompOverlap")
this.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "JexlExpression")
this.out = swapExt(vcf,".vcf",".eval")
}
def productionString(gsaProduction: Boolean): String = {
return if ( gsaProduction ) ".prod" else ".expt"
}
class Call(@Input(doc="foo") bam: File, gsaProduction: Boolean) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
@Output(doc="foo") var outVCF: File = swapExt(bam,".bam", productionString(gsaProduction) + ".indels.vcf")
this.input_file = List(bam)
this.stand_call_conf = 50.0
this.stand_emit_conf = 50.0
this.dcov = DCOV;
this.o = outVCF
this.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.INDEL
this.GSA_PRODUCTION_ONLY = gsaProduction
if ( dbSNP.exists() )
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
}
}