merge intervals now prints a sorted list in the end.

added the ccs datasets to the pbCalling pipeline.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5233 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-02-11 20:57:59 +00:00
parent 50c2fa3c3a
commit 5f10fffa47
2 changed files with 29 additions and 10 deletions

View File

@ -9,9 +9,17 @@
assert(table.getn(arg) > 0, "\n\nMissing input file\n\nUsage:\n\tlua MergeIntervals.lua raw.intervals > merged.intervals\n\n")
local function comp(a, b)
if tonumber(a) and tonumber(b) then return tonumber(a) < tonumber(b) end
return a<b
end
-- read file and create chromosome tables with raw intervals marked as "x"
local intervals = {}
local intervalKeys = {}
local sortedChrs = {}
for l in io.lines(arg[1]) do
local chr, a, b = l:match("([%a%d]+):(%d+)-(%d+)")
a,b = tonumber(a), tonumber(b)
@ -20,6 +28,7 @@ for l in io.lines(arg[1]) do
intervals[chr].min = a
intervals[chr].max = b
intervalKeys[chr] = {}
table.insert(sortedChrs, chr)
end
for i=a,b do intervals[chr][i] = "x" end
intervals[chr].min = math.min(intervals[chr].min, a)
@ -51,9 +60,10 @@ for c, chrTable in pairs(intervals) do
end
end
for c,v in pairs(intervalsIndex) do
table.sort(v)
for _,intervalStart in ipairs(v) do
table.sort(sortedChrs, comp)
for _,c in pairs(sortedChrs) do
table.sort(intervalsIndex[c])
for _,intervalStart in ipairs(intervalsIndex[c]) do
if intervalSize[c][intervalStart] > 0 then
print(c..":"..intervalStart.."-"..intervalStart + intervalSize[c][intervalStart])
end

View File

@ -1,3 +1,4 @@
import org.broadinstitute.sting.gatk.CommandLineGATK
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
@ -110,16 +111,24 @@ class pbCalling extends QScript {
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass),
"pacbio" -> new Target("pacbio", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pacbio.recal.bam"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pacbio.filtered.vcf"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pacbio.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pacbio.hg19.intervals", 1.8, !lowPass),
"pb200" -> new Target("pb200", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb200.recal.bam"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pb200.filtered.vcf"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pb200.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb200.hg19.intervals", 1.8, !lowPass),
"pb2k" -> new Target("pb2k", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.recal.bam"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pb2k.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.hg19.intervals", 1.8, !lowPass)
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pb2k.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.hg19.intervals", 1.8, !lowPass),
"cc200" -> new Target("cc200", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc200.recal.bam"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/cc200.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc200.hg19.intervals", 1.8, !lowPass),
"cc2k" -> new Target("cc2k", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc2k.recal.bam"),
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/cc2k.filtered.vcf"),
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc2k.hg19.intervals", 1.8, !lowPass)
)
@ -245,7 +254,7 @@ class pbCalling extends QScript {
this.tranchesFile = t.tsTranchesFile
}
// 5.) Variant Cut (OPTIONAL!) filter out the variants marked by recalibration to the 99% tranche
// 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche
class VariantCut(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ApplyVariantCuts with UNIVERSAL_GATK_ARGS {
this.reference_sequence = t.reference
this.rodBind :+= RodBind("input", "VCF", t.tsRecalibratedVCF )
@ -260,8 +269,8 @@ class pbCalling extends QScript {
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
}
// 6.) Variant Evaluation (OPTIONAL!) based on the sensitivity recalibrated vcf
class VariantEvaluation(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
// 6.) Variant Evaluation based on the sensitivity recalibrated vcf
class VariantEvaluation(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.VariantEval with UNIVERSAL_GATK_ARGS {
val name: String = t.name
this.reference_sequence = t.reference
this.rodBind :+= RodBind("comphapmap", "VCF", t.hapmapFile)