merge intervals now prints a sorted list in the end.
added the ccs datasets to the pbCalling pipeline. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5233 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
50c2fa3c3a
commit
5f10fffa47
|
|
@ -9,9 +9,17 @@
|
|||
|
||||
assert(table.getn(arg) > 0, "\n\nMissing input file\n\nUsage:\n\tlua MergeIntervals.lua raw.intervals > merged.intervals\n\n")
|
||||
|
||||
|
||||
local function comp(a, b)
|
||||
if tonumber(a) and tonumber(b) then return tonumber(a) < tonumber(b) end
|
||||
return a<b
|
||||
end
|
||||
|
||||
|
||||
-- read file and create chromosome tables with raw intervals marked as "x"
|
||||
local intervals = {}
|
||||
local intervalKeys = {}
|
||||
local sortedChrs = {}
|
||||
for l in io.lines(arg[1]) do
|
||||
local chr, a, b = l:match("([%a%d]+):(%d+)-(%d+)")
|
||||
a,b = tonumber(a), tonumber(b)
|
||||
|
|
@ -20,6 +28,7 @@ for l in io.lines(arg[1]) do
|
|||
intervals[chr].min = a
|
||||
intervals[chr].max = b
|
||||
intervalKeys[chr] = {}
|
||||
table.insert(sortedChrs, chr)
|
||||
end
|
||||
for i=a,b do intervals[chr][i] = "x" end
|
||||
intervals[chr].min = math.min(intervals[chr].min, a)
|
||||
|
|
@ -51,9 +60,10 @@ for c, chrTable in pairs(intervals) do
|
|||
end
|
||||
end
|
||||
|
||||
for c,v in pairs(intervalsIndex) do
|
||||
table.sort(v)
|
||||
for _,intervalStart in ipairs(v) do
|
||||
table.sort(sortedChrs, comp)
|
||||
for _,c in pairs(sortedChrs) do
|
||||
table.sort(intervalsIndex[c])
|
||||
for _,intervalStart in ipairs(intervalsIndex[c]) do
|
||||
if intervalSize[c][intervalStart] > 0 then
|
||||
print(c..":"..intervalStart.."-"..intervalStart + intervalSize[c][intervalStart])
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
||||
|
|
@ -110,16 +111,24 @@ class pbCalling extends QScript {
|
|||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass),
|
||||
"pacbio" -> new Target("pacbio", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pacbio.recal.bam"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pacbio.filtered.vcf"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pacbio.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pacbio.hg19.intervals", 1.8, !lowPass),
|
||||
"pb200" -> new Target("pb200", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb200.recal.bam"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pb200.filtered.vcf"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pb200.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb200.hg19.intervals", 1.8, !lowPass),
|
||||
"pb2k" -> new Target("pb2k", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.recal.bam"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/amplicon/pb2k.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.hg19.intervals", 1.8, !lowPass)
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/pb2k.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/pb2k.hg19.intervals", 1.8, !lowPass),
|
||||
"cc200" -> new Target("cc200", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc200.recal.bam"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/cc200.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc200.hg19.intervals", 1.8, !lowPass),
|
||||
"cc2k" -> new Target("cc2k", b37, dbSNP_b37_129, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc2k.recal.bam"),
|
||||
new File("/humgen/gsa-scr1/carneiro/prj/pacbio/analisys/snps/cc2k.filtered.vcf"),
|
||||
"/humgen/gsa-scr1/carneiro/prj/pacbio/data/cc2k.hg19.intervals", 1.8, !lowPass)
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -245,7 +254,7 @@ class pbCalling extends QScript {
|
|||
this.tranchesFile = t.tsTranchesFile
|
||||
}
|
||||
|
||||
// 5.) Variant Cut (OPTIONAL!) filter out the variants marked by recalibration to the 99% tranche
|
||||
// 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche
|
||||
class VariantCut(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ApplyVariantCuts with UNIVERSAL_GATK_ARGS {
|
||||
this.reference_sequence = t.reference
|
||||
this.rodBind :+= RodBind("input", "VCF", t.tsRecalibratedVCF )
|
||||
|
|
@ -260,8 +269,8 @@ class pbCalling extends QScript {
|
|||
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
|
||||
}
|
||||
|
||||
// 6.) Variant Evaluation (OPTIONAL!) based on the sensitivity recalibrated vcf
|
||||
class VariantEvaluation(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
|
||||
// 6.) Variant Evaluation based on the sensitivity recalibrated vcf
|
||||
class VariantEvaluation(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.VariantEval with UNIVERSAL_GATK_ARGS {
|
||||
val name: String = t.name
|
||||
this.reference_sequence = t.reference
|
||||
this.rodBind :+= RodBind("comphapmap", "VCF", t.hapmapFile)
|
||||
|
|
|
|||
Loading…
Reference in New Issue