diff --git a/lua/IntervalUtils.lua b/lua/IntervalUtils.lua new file mode 100644 index 000000000..dc377d7f7 --- /dev/null +++ b/lua/IntervalUtils.lua @@ -0,0 +1,140 @@ +------------------------------------------------------------------------------------------------------------------------ +-- Creates a new interval table +-- +-- Return values: +-- 1: Interval table +------------------------------------------------------------------------------------------------------------------------ + +function newInterval(chr, start, finish, strand, info) + return {chr= chr, start=tonumber(start), finish=tonumber(finish), strand=strand, info=info} +end + + +------------------------------------------------------------------------------------------------------------------------ +-- Parses a line from an interval list file (not a header line!) +-- +-- Return values: +-- 1: chromosome +-- 2: interval start +-- 3: interval end +-- 4: strand (+/-) +-- 5: info field +------------------------------------------------------------------------------------------------------------------------ +local function parseIntervalLine(l) + return l:match("(%w+)%s+(%d+)%s+(%d+)%s+([%+%-])%s+(.*)") +end + +------------------------------------------------------------------------------------------------------------------------ +-- Reads an interval list file into a table +-- +-- Return values: +-- 1: table of intervals +-- 2: header string +------------------------------------------------------------------------------------------------------------------------ +local function readIntervalList(filename) + t = {} + header = "" + for l in io.lines(filename) do + if l:sub(1,1) == "@" then header = header .. l .."\n" + else + local chr, start, finish, strand, info = parseIntervalLine(l) + table.insert(t, newInterval(chr, start, finish, strand, info)) + end + end + return t, header +end + +------------------------------------------------------------------------------------------------------------------------ +-- Checks if two intervals have the same chromosome, start and end. +-- +-- Return values: +-- 1: true/false +------------------------------------------------------------------------------------------------------------------------ +local function isSameInterval (i1, i2) + return i1.chr == i2.chr and i1.start == i2.start and i1.finish == i2.finish +end + +------------------------------------------------------------------------------------------------------------------------ +-- Checks if the line from an interval list file is a header line +-- +-- Return values: +-- 1: true/false +------------------------------------------------------------------------------------------------------------------------ +local function isIntervalHeaderLine(l) + return l:sub(1,1) == "@" +end + + +------------------------------------------------------------------------------------------------------------------------ +-- Compares the start position of two intervals +-- +-- Return values: +-- 1: -1, 0 or 1 (respectively for a < b, a == b, a > b) +------------------------------------------------------------------------------------------------------------------------ +local function compIntervals(a, b) + + local function c(x,y) + if x < y then return -1 + elseif x > y then return 1 + else return 0 end + end + -- same chromosomes + if a.chr == b.chr then return c(a.start, b.start) + else + x = tonumber(a.chr) + y = tonumber(b.chr) + if x and y then return c(x,y) + else return c(a.chr, b.chr) end + end +end + +------------------------------------------------------------------------------------------------------------------------ +-- Compare function to sort a list of intervals (use with table.sort) +-- +-- Return values: +-- 1: true if a < b, false otherwise. +------------------------------------------------------------------------------------------------------------------------ +local function sortCompInterval(a, b) + if a.chr == b.chr then return a.start < b.start end + local x = tonumber(a.chr) + local y = tonumber(b.chr) + if x and y then + return x < y end + return a.chr < b.chr +end + +------------------------------------------------------------------------------------------------------------------------ +-- Checks if the interval is a valid human genome interval +-- +-- Return values: +-- 1: true/false +------------------------------------------------------------------------------------------------------------------------ +local function isValidInterval(interval) + local x + if interval.chr == "X" then x = 23 + elseif interval.chr == "Y" then x = 24 + elseif interval.chr == "MT" then x = 25 + else x = tonumber(interval.chr) end + return x >= 1 and x <= 25 and interval.start < interval.finish and chr_limits[x] > interval.finish +end + + +------------------------------------------------------------------------------------------------------------------------ +-- Checks if the intervals are overlapping. Intervals are said to overlap if one of the following is true: +-- i1i2: i1 starts before i2, but ends inside i2. +-- i2i1: i2 starts before i1, but ends inside i1. +-- i1_inside: i1 is fully contained inside i2. +-- i2_inside: i2 is fully contained inside i1. +-- +-- Return values: +-- 1: true/false +-- 2: if true, returns "i1i2", "i2i1", "i1_inside", "i2_inside" +------------------------------------------------------------------------------------------------------------------------ +local function isOverlappingInterval(i1, i2) + if i1.chr ~= i2.chr then return false + elseif i1.start < i2.start and i1.finish < i2.finish then return true, "i1i2" + elseif i2.start < i1.start and i2.finish < i1.finish then return true, "i2i1" + elseif i1.start > i2.start and i1.finish < i2.finish then return true, "i1_inside" + elseif i2.start > i1.start and i2.finish < i1.finish then return true, "i2_inside" + else return false end +end \ No newline at end of file diff --git a/lua/hasTheseTargets.lua b/lua/hasTheseTargets.lua index 912bb7cc4..3a6682de1 100644 --- a/lua/hasTheseTargets.lua +++ b/lua/hasTheseTargets.lua @@ -114,8 +114,6 @@ local function findInterval(i, intervals) local finish = table.getn(intervals) local current = math.floor((start + finish) / 2) --- print("findInterval: ", start, finish, current, intervals[current].c, intervals[current].s, intervals[current].e) - while start < finish and not intervalContainsInterval(i, intervals[current]) and not isInterceptingInterval(i, intervals[current]) do if compIntervals(i, intervals[current]) < 0 then finish = current - 1 @@ -123,7 +121,6 @@ local function findInterval(i, intervals) start = current + 1 end current = math.floor((start + finish) / 2) --- print("findInterval: ", start, finish, current, intervals[current].c, intervals[current].s, intervals[current].e) end return intervalContainsInterval(i, intervals[current]), current end @@ -138,7 +135,6 @@ for l in io.lines(targetSet) do if not isIntervalHeaderLine(l) then local c, s, e, st, info = parseIntervalLine(l) local intA = newInterval(c,s,e,st,info) --- print("Debug: ", c,s,e,info) local intervalExists, i = findInterval(intA, a) if intervalExists then print(a[i].c, a[i].s, a[i].e, st, info) diff --git a/lua/sortGenesByCoverage.lua b/lua/sortGenesByCoverage.lua index 4bbf2b364..6860976fe 100644 --- a/lua/sortGenesByCoverage.lua +++ b/lua/sortGenesByCoverage.lua @@ -8,7 +8,7 @@ -- Global script variables ------------------------------------------------------------------------------- local coverage = arg[1] -local genesOfInterest= arg[2] or "/Volumes/humgen/gsa-hpprojects/dev/carneiro/top_genes/data/disease.genes.annotated.interval_list" +local genesOfInterest= arg[2] local genes = {} local header = "" @@ -33,7 +33,6 @@ for l in io.lines(genesOfInterest) do if l:sub(1,1) == "@" then header = header .. l .."\n" else local c, s, e, info = l:match("(%w+)%s+(%d+)%s+(%d+)%s+%+%s+(.*)") --- print("Debug: adding gene: ",c,s,e,info) table.insert(genes, {c=c, s=tonumber(s), e=tonumber(e), info=info}) end end @@ -46,7 +45,6 @@ for l in io.lines(coverage) do s = tonumber(s) e = tonumber(e) while genes[i] ~= nil and (isSameGene(i, c, s, e) or isMergedGene(i, c, s, e)) do --- print("Debug: ", c, s, e, "--",genes[i].c, genes[i].s, genes[i].e) genes[i].totalCoverage = tonumber(totalCoverage) genes[i].avgCoverage = tonumber(avgCoverage) geneOk = true