cleaned up the scripts and created an interval library to facilitate future reuse.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5895 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0048f1f6d3
commit
260301016a
|
|
@ -0,0 +1,140 @@
|
|||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Creates a new interval table
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: Interval table
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
function newInterval(chr, start, finish, strand, info)
|
||||
return {chr= chr, start=tonumber(start), finish=tonumber(finish), strand=strand, info=info}
|
||||
end
|
||||
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Parses a line from an interval list file (not a header line!)
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: chromosome
|
||||
-- 2: interval start
|
||||
-- 3: interval end
|
||||
-- 4: strand (+/-)
|
||||
-- 5: info field
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function parseIntervalLine(l)
|
||||
return l:match("(%w+)%s+(%d+)%s+(%d+)%s+([%+%-])%s+(.*)")
|
||||
end
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Reads an interval list file into a table
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: table of intervals
|
||||
-- 2: header string
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function readIntervalList(filename)
|
||||
t = {}
|
||||
header = ""
|
||||
for l in io.lines(filename) do
|
||||
if l:sub(1,1) == "@" then header = header .. l .."\n"
|
||||
else
|
||||
local chr, start, finish, strand, info = parseIntervalLine(l)
|
||||
table.insert(t, newInterval(chr, start, finish, strand, info))
|
||||
end
|
||||
end
|
||||
return t, header
|
||||
end
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Checks if two intervals have the same chromosome, start and end.
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: true/false
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function isSameInterval (i1, i2)
|
||||
return i1.chr == i2.chr and i1.start == i2.start and i1.finish == i2.finish
|
||||
end
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Checks if the line from an interval list file is a header line
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: true/false
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function isIntervalHeaderLine(l)
|
||||
return l:sub(1,1) == "@"
|
||||
end
|
||||
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Compares the start position of two intervals
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: -1, 0 or 1 (respectively for a < b, a == b, a > b)
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function compIntervals(a, b)
|
||||
|
||||
local function c(x,y)
|
||||
if x < y then return -1
|
||||
elseif x > y then return 1
|
||||
else return 0 end
|
||||
end
|
||||
-- same chromosomes
|
||||
if a.chr == b.chr then return c(a.start, b.start)
|
||||
else
|
||||
x = tonumber(a.chr)
|
||||
y = tonumber(b.chr)
|
||||
if x and y then return c(x,y)
|
||||
else return c(a.chr, b.chr) end
|
||||
end
|
||||
end
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Compare function to sort a list of intervals (use with table.sort)
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: true if a < b, false otherwise.
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function sortCompInterval(a, b)
|
||||
if a.chr == b.chr then return a.start < b.start end
|
||||
local x = tonumber(a.chr)
|
||||
local y = tonumber(b.chr)
|
||||
if x and y then
|
||||
return x < y end
|
||||
return a.chr < b.chr
|
||||
end
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Checks if the interval is a valid human genome interval
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: true/false
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function isValidInterval(interval)
|
||||
local x
|
||||
if interval.chr == "X" then x = 23
|
||||
elseif interval.chr == "Y" then x = 24
|
||||
elseif interval.chr == "MT" then x = 25
|
||||
else x = tonumber(interval.chr) end
|
||||
return x >= 1 and x <= 25 and interval.start < interval.finish and chr_limits[x] > interval.finish
|
||||
end
|
||||
|
||||
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
-- Checks if the intervals are overlapping. Intervals are said to overlap if one of the following is true:
|
||||
-- i1i2: i1 starts before i2, but ends inside i2.
|
||||
-- i2i1: i2 starts before i1, but ends inside i1.
|
||||
-- i1_inside: i1 is fully contained inside i2.
|
||||
-- i2_inside: i2 is fully contained inside i1.
|
||||
--
|
||||
-- Return values:
|
||||
-- 1: true/false
|
||||
-- 2: if true, returns "i1i2", "i2i1", "i1_inside", "i2_inside"
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
local function isOverlappingInterval(i1, i2)
|
||||
if i1.chr ~= i2.chr then return false
|
||||
elseif i1.start < i2.start and i1.finish < i2.finish then return true, "i1i2"
|
||||
elseif i2.start < i1.start and i2.finish < i1.finish then return true, "i2i1"
|
||||
elseif i1.start > i2.start and i1.finish < i2.finish then return true, "i1_inside"
|
||||
elseif i2.start > i1.start and i2.finish < i1.finish then return true, "i2_inside"
|
||||
else return false end
|
||||
end
|
||||
|
|
@ -114,8 +114,6 @@ local function findInterval(i, intervals)
|
|||
local finish = table.getn(intervals)
|
||||
local current = math.floor((start + finish) / 2)
|
||||
|
||||
-- print("findInterval: ", start, finish, current, intervals[current].c, intervals[current].s, intervals[current].e)
|
||||
|
||||
while start < finish and not intervalContainsInterval(i, intervals[current]) and not isInterceptingInterval(i, intervals[current]) do
|
||||
if compIntervals(i, intervals[current]) < 0 then
|
||||
finish = current - 1
|
||||
|
|
@ -123,7 +121,6 @@ local function findInterval(i, intervals)
|
|||
start = current + 1
|
||||
end
|
||||
current = math.floor((start + finish) / 2)
|
||||
-- print("findInterval: ", start, finish, current, intervals[current].c, intervals[current].s, intervals[current].e)
|
||||
end
|
||||
return intervalContainsInterval(i, intervals[current]), current
|
||||
end
|
||||
|
|
@ -138,7 +135,6 @@ for l in io.lines(targetSet) do
|
|||
if not isIntervalHeaderLine(l) then
|
||||
local c, s, e, st, info = parseIntervalLine(l)
|
||||
local intA = newInterval(c,s,e,st,info)
|
||||
-- print("Debug: ", c,s,e,info)
|
||||
local intervalExists, i = findInterval(intA, a)
|
||||
if intervalExists then
|
||||
print(a[i].c, a[i].s, a[i].e, st, info)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
-- Global script variables
|
||||
-------------------------------------------------------------------------------
|
||||
local coverage = arg[1]
|
||||
local genesOfInterest= arg[2] or "/Volumes/humgen/gsa-hpprojects/dev/carneiro/top_genes/data/disease.genes.annotated.interval_list"
|
||||
local genesOfInterest= arg[2]
|
||||
|
||||
local genes = {}
|
||||
local header = ""
|
||||
|
|
@ -33,7 +33,6 @@ for l in io.lines(genesOfInterest) do
|
|||
if l:sub(1,1) == "@" then header = header .. l .."\n"
|
||||
else
|
||||
local c, s, e, info = l:match("(%w+)%s+(%d+)%s+(%d+)%s+%+%s+(.*)")
|
||||
-- print("Debug: adding gene: ",c,s,e,info)
|
||||
table.insert(genes, {c=c, s=tonumber(s), e=tonumber(e), info=info})
|
||||
end
|
||||
end
|
||||
|
|
@ -46,7 +45,6 @@ for l in io.lines(coverage) do
|
|||
s = tonumber(s)
|
||||
e = tonumber(e)
|
||||
while genes[i] ~= nil and (isSameGene(i, c, s, e) or isMergedGene(i, c, s, e)) do
|
||||
-- print("Debug: ", c, s, e, "--",genes[i].c, genes[i].s, genes[i].e)
|
||||
genes[i].totalCoverage = tonumber(totalCoverage)
|
||||
genes[i].avgCoverage = tonumber(avgCoverage)
|
||||
geneOk = true
|
||||
|
|
|
|||
Loading…
Reference in New Issue