678 lines
20 KiB
Lua
678 lines
20 KiB
Lua
|
|
--[[
|
||
|
|
The MIT License
|
||
|
|
|
||
|
|
Copyright (c) 2011, Attractive Chaos <attractor@live.co.uk>
|
||
|
|
|
||
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
|
of this software and associated documentation files (the "Software"), to deal
|
||
|
|
in the Software without restriction, including without limitation the rights
|
||
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
|
copies of the Software, and to permit persons to whom the Software is
|
||
|
|
furnished to do so, subject to the following conditions:
|
||
|
|
|
||
|
|
The above copyright notice and this permission notice shall be included in
|
||
|
|
all copies or substantial portions of the Software.
|
||
|
|
|
||
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||
|
|
SOFTWARE.
|
||
|
|
]]--
|
||
|
|
|
||
|
|
--[[
|
||
|
|
This is a Lua library, more exactly a collection of Lua snippets, covering
|
||
|
|
utilities (e.g. getopt), string operations (e.g. split), statistics (e.g.
|
||
|
|
Fisher's exact test), special functions (e.g. logarithm gamma) and matrix
|
||
|
|
operations (e.g. Gauss-Jordan elimination). The routines are designed to be
|
||
|
|
as independent as possible, such that one can copy-paste relevant pieces of
|
||
|
|
code without worrying about additional library dependencies.
|
||
|
|
|
||
|
|
If you use routines from this library, please include the licensing
|
||
|
|
information above where appropriate.
|
||
|
|
]]--
|
||
|
|
|
||
|
|
--[[
|
||
|
|
Library functions and dependencies. "a>b" means "a is required by b"; "b<a"
|
||
|
|
means "b depends on a".
|
||
|
|
|
||
|
|
os.getopt()
|
||
|
|
string:split()
|
||
|
|
io.xopen()
|
||
|
|
table.ksmall()
|
||
|
|
table.shuffle()
|
||
|
|
math.lgamma() >math.lbinom() >math.igamma()
|
||
|
|
math.igamma() <math.lgamma() >matrix.chi2()
|
||
|
|
math.erfc()
|
||
|
|
math.lbinom() <math.lgamma() >math.fisher_exact()
|
||
|
|
math.bernstein_poly() <math.lbinom()
|
||
|
|
math.fisher_exact() <math.lbinom()
|
||
|
|
math.jackknife()
|
||
|
|
math.pearson()
|
||
|
|
math.spearman()
|
||
|
|
math.fmin()
|
||
|
|
matrix
|
||
|
|
matrix.add()
|
||
|
|
matrix.T() >matrix.mul()
|
||
|
|
matrix.mul() <matrix.T()
|
||
|
|
matrix.tostring()
|
||
|
|
matrix.chi2() <math.igamma()
|
||
|
|
matrix.solve()
|
||
|
|
]]--
|
||
|
|
|
||
|
|
-- Description: getopt() translated from the BSD getopt(); compatible with the default Unix getopt()
|
||
|
|
--[[ Example:
|
||
|
|
for o, a in os.getopt(arg, 'a:b') do
|
||
|
|
print(o, a)
|
||
|
|
end
|
||
|
|
]]--
|
||
|
|
function os.getopt(args, ostr)
|
||
|
|
local arg, place = nil, 0;
|
||
|
|
return function ()
|
||
|
|
if place == 0 then -- update scanning pointer
|
||
|
|
place = 1
|
||
|
|
if #args == 0 or args[1]:sub(1, 1) ~= '-' then place = 0; return nil end
|
||
|
|
if #args[1] >= 2 then
|
||
|
|
place = place + 1
|
||
|
|
if args[1]:sub(2, 2) == '-' then -- found "--"
|
||
|
|
place = 0
|
||
|
|
table.remove(args, 1);
|
||
|
|
return nil;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
local optopt = args[1]:sub(place, place);
|
||
|
|
place = place + 1;
|
||
|
|
local oli = ostr:find(optopt);
|
||
|
|
if optopt == ':' or oli == nil then -- unknown option
|
||
|
|
if optopt == '-' then return nil end
|
||
|
|
if place > #args[1] then
|
||
|
|
table.remove(args, 1);
|
||
|
|
place = 0;
|
||
|
|
end
|
||
|
|
return '?';
|
||
|
|
end
|
||
|
|
oli = oli + 1;
|
||
|
|
if ostr:sub(oli, oli) ~= ':' then -- do not need argument
|
||
|
|
arg = nil;
|
||
|
|
if place > #args[1] then
|
||
|
|
table.remove(args, 1);
|
||
|
|
place = 0;
|
||
|
|
end
|
||
|
|
else -- need an argument
|
||
|
|
if place <= #args[1] then -- no white space
|
||
|
|
arg = args[1]:sub(place);
|
||
|
|
else
|
||
|
|
table.remove(args, 1);
|
||
|
|
if #args == 0 then -- an option requiring argument is the last one
|
||
|
|
place = 0;
|
||
|
|
if ostr:sub(1, 1) == ':' then return ':' end
|
||
|
|
return '?';
|
||
|
|
else arg = args[1] end
|
||
|
|
end
|
||
|
|
table.remove(args, 1);
|
||
|
|
place = 0;
|
||
|
|
end
|
||
|
|
return optopt, arg;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: string split
|
||
|
|
function string:split(sep, n)
|
||
|
|
local a, start = {}, 1;
|
||
|
|
sep = sep or "%s+";
|
||
|
|
repeat
|
||
|
|
local b, e = self:find(sep, start);
|
||
|
|
if b == nil then
|
||
|
|
table.insert(a, self:sub(start));
|
||
|
|
break
|
||
|
|
end
|
||
|
|
a[#a+1] = self:sub(start, b - 1);
|
||
|
|
start = e + 1;
|
||
|
|
if n and #a == n then
|
||
|
|
table.insert(a, self:sub(start));
|
||
|
|
break
|
||
|
|
end
|
||
|
|
until start > #self;
|
||
|
|
return a;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: smart file open
|
||
|
|
function io.xopen(fn, mode)
|
||
|
|
mode = mode or 'r';
|
||
|
|
if fn == nil then return io.stdin;
|
||
|
|
elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout;
|
||
|
|
elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w');
|
||
|
|
elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w');
|
||
|
|
else return io.open(fn, mode) end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: find the k-th smallest element in an array (Ref. http://ndevilla.free.fr/median/)
|
||
|
|
function table.ksmall(arr, k)
|
||
|
|
local low, high = 1, #arr;
|
||
|
|
while true do
|
||
|
|
if high <= low then return arr[k] end
|
||
|
|
if high == low + 1 then
|
||
|
|
if arr[high] < arr[low] then arr[high], arr[low] = arr[low], arr[high] end;
|
||
|
|
return arr[k];
|
||
|
|
end
|
||
|
|
local mid = math.floor((high + low) / 2);
|
||
|
|
if arr[high] < arr[mid] then arr[mid], arr[high] = arr[high], arr[mid] end
|
||
|
|
if arr[high] < arr[low] then arr[low], arr[high] = arr[high], arr[low] end
|
||
|
|
if arr[low] < arr[mid] then arr[low], arr[mid] = arr[mid], arr[low] end
|
||
|
|
arr[mid], arr[low+1] = arr[low+1], arr[mid];
|
||
|
|
local ll, hh = low + 1, high;
|
||
|
|
while true do
|
||
|
|
repeat ll = ll + 1 until arr[ll] >= arr[low]
|
||
|
|
repeat hh = hh - 1 until arr[low] >= arr[hh]
|
||
|
|
if hh < ll then break end
|
||
|
|
arr[ll], arr[hh] = arr[hh], arr[ll];
|
||
|
|
end
|
||
|
|
arr[low], arr[hh] = arr[hh], arr[low];
|
||
|
|
if hh <= k then low = ll end
|
||
|
|
if hh >= k then high = hh - 1 end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: shuffle/permutate an array
|
||
|
|
function table.shuffle(a)
|
||
|
|
for i = #a, 1, -1 do
|
||
|
|
local j = math.random(i)
|
||
|
|
a[j], a[i] = a[i], a[j]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
--
|
||
|
|
-- Mathematics
|
||
|
|
--
|
||
|
|
|
||
|
|
-- Description: log gamma function
|
||
|
|
-- Required by: math.lbinom()
|
||
|
|
-- Reference: AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
|
||
|
|
function math.lgamma(z)
|
||
|
|
local x;
|
||
|
|
x = 0.1659470187408462e-06 / (z+7);
|
||
|
|
x = x + 0.9934937113930748e-05 / (z+6);
|
||
|
|
x = x - 0.1385710331296526 / (z+5);
|
||
|
|
x = x + 12.50734324009056 / (z+4);
|
||
|
|
x = x - 176.6150291498386 / (z+3);
|
||
|
|
x = x + 771.3234287757674 / (z+2);
|
||
|
|
x = x - 1259.139216722289 / (z+1);
|
||
|
|
x = x + 676.5203681218835 / z;
|
||
|
|
x = x + 0.9999999999995183;
|
||
|
|
return math.log(x) - 5.58106146679532777 - z + (z-0.5) * math.log(z+6.5);
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: regularized incomplete gamma function
|
||
|
|
-- Dependent on: math.lgamma()
|
||
|
|
--[[
|
||
|
|
Formulas are taken from Wiki, with additional input from Numerical
|
||
|
|
Recipes in C (for modified Lentz's algorithm) and AS245
|
||
|
|
(http://lib.stat.cmu.edu/apstat/245).
|
||
|
|
|
||
|
|
A good online calculator is available at:
|
||
|
|
|
||
|
|
http://www.danielsoper.com/statcalc/calc23.aspx
|
||
|
|
|
||
|
|
It calculates upper incomplete gamma function, which equals
|
||
|
|
math.igamma(s,z,true)*math.exp(math.lgamma(s))
|
||
|
|
]]--
|
||
|
|
function math.igamma(s, z, complement)
|
||
|
|
|
||
|
|
local function _kf_gammap(s, z)
|
||
|
|
local sum, x = 1, 1;
|
||
|
|
for k = 1, 100 do
|
||
|
|
x = x * z / (s + k);
|
||
|
|
sum = sum + x;
|
||
|
|
if x / sum < 1e-14 then break end
|
||
|
|
end
|
||
|
|
return math.exp(s * math.log(z) - z - math.lgamma(s + 1.) + math.log(sum));
|
||
|
|
end
|
||
|
|
|
||
|
|
local function _kf_gammaq(s, z)
|
||
|
|
local C, D, f, TINY;
|
||
|
|
f = 1. + z - s; C = f; D = 0.; TINY = 1e-290;
|
||
|
|
-- Modified Lentz's algorithm for computing continued fraction. See Numerical Recipes in C, 2nd edition, section 5.2
|
||
|
|
for j = 1, 100 do
|
||
|
|
local d;
|
||
|
|
local a, b = j * (s - j), j*2 + 1 + z - s;
|
||
|
|
D = b + a * D;
|
||
|
|
if D < TINY then D = TINY end
|
||
|
|
C = b + a / C;
|
||
|
|
if C < TINY then C = TINY end
|
||
|
|
D = 1. / D;
|
||
|
|
d = C * D;
|
||
|
|
f = f * d;
|
||
|
|
if math.abs(d - 1) < 1e-14 then break end
|
||
|
|
end
|
||
|
|
return math.exp(s * math.log(z) - z - math.lgamma(s) - math.log(f));
|
||
|
|
end
|
||
|
|
|
||
|
|
if complement then
|
||
|
|
return ((z <= 1 or z < s) and 1 - _kf_gammap(s, z)) or _kf_gammaq(s, z);
|
||
|
|
else
|
||
|
|
return ((z <= 1 or z < s) and _kf_gammap(s, z)) or (1 - _kf_gammaq(s, z));
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
math.M_SQRT2 = 1.41421356237309504880 -- sqrt(2)
|
||
|
|
math.M_SQRT1_2 = 0.70710678118654752440 -- 1/sqrt(2)
|
||
|
|
|
||
|
|
-- Description: complement error function erfc(x): \Phi(x) = 0.5 * erfc(-x/M_SQRT2)
|
||
|
|
function math.erfc(x)
|
||
|
|
local z = math.abs(x) * math.M_SQRT2
|
||
|
|
if z > 37 then return (x > 0 and 0) or 2 end
|
||
|
|
local expntl = math.exp(-0.5 * z * z)
|
||
|
|
local p
|
||
|
|
if z < 10. / math.M_SQRT2 then -- for small z
|
||
|
|
p = expntl * ((((((.03526249659989109 * z + .7003830644436881) * z + 6.37396220353165) * z + 33.912866078383)
|
||
|
|
* z + 112.0792914978709) * z + 221.2135961699311) * z + 220.2068679123761)
|
||
|
|
/ (((((((.08838834764831844 * z + 1.755667163182642) * z + 16.06417757920695) * z + 86.78073220294608)
|
||
|
|
* z + 296.5642487796737) * z + 637.3336333788311) * z + 793.8265125199484) * z + 440.4137358247522);
|
||
|
|
else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))) end
|
||
|
|
return (x > 0 and 2 * p) or 2 * (1 - p)
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: log binomial coefficient
|
||
|
|
-- Dependent on: math.lgamma()
|
||
|
|
-- Required by: math.fisher_exact()
|
||
|
|
function math.lbinom(n, m)
|
||
|
|
if m == nil then
|
||
|
|
local a = {};
|
||
|
|
a[0], a[n] = 0, 0;
|
||
|
|
local t = math.lgamma(n+1);
|
||
|
|
for m = 1, n-1 do a[m] = t - math.lgamma(m+1) - math.lgamma(n-m+1) end
|
||
|
|
return a;
|
||
|
|
else return math.lgamma(n+1) - math.lgamma(m+1) - math.lgamma(n-m+1) end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Berstein polynomials (mainly for Bezier curves)
|
||
|
|
-- Dependent on: math.lbinom()
|
||
|
|
-- Note: to compute derivative: let beta_new[i]=beta[i+1]-beta[i]
|
||
|
|
function math.bernstein_poly(beta)
|
||
|
|
local n = #beta - 1;
|
||
|
|
local lbc = math.lbinom(n); -- log binomial coefficients
|
||
|
|
return function (t)
|
||
|
|
assert(t >= 0 and t <= 1);
|
||
|
|
if t == 0 then return beta[1] end
|
||
|
|
if t == 1 then return beta[n+1] end
|
||
|
|
local sum, logt, logt1 = 0, math.log(t), math.log(1-t);
|
||
|
|
for i = 0, n do sum = sum + beta[i+1] * math.exp(lbc[i] + i * logt + (n-i) * logt1) end
|
||
|
|
return sum;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Fisher's exact test
|
||
|
|
-- Dependent on: math.lbinom()
|
||
|
|
-- Return: left-, right- and two-tail P-values
|
||
|
|
--[[
|
||
|
|
Fisher's exact test for 2x2 congintency tables:
|
||
|
|
|
||
|
|
n11 n12 | n1_
|
||
|
|
n21 n22 | n2_
|
||
|
|
-----------+----
|
||
|
|
n_1 n_2 | n
|
||
|
|
|
||
|
|
Reference: http://www.langsrud.com/fisher.htm
|
||
|
|
]]--
|
||
|
|
function math.fisher_exact(n11, n12, n21, n22)
|
||
|
|
local aux; -- keep the states of n* for acceleration
|
||
|
|
|
||
|
|
-- Description: hypergeometric function
|
||
|
|
local function hypergeo(n11, n1_, n_1, n)
|
||
|
|
return math.exp(math.lbinom(n1_, n11) + math.lbinom(n-n1_, n_1-n11) - math.lbinom(n, n_1));
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: incremental hypergeometric function
|
||
|
|
-- Note: aux = {n11, n1_, n_1, n, p}
|
||
|
|
local function hypergeo_inc(n11, n1_, n_1, n)
|
||
|
|
if n1_ ~= 0 or n_1 ~= 0 or n ~= 0 then
|
||
|
|
aux = {n11, n1_, n_1, n, 1};
|
||
|
|
else -- then only n11 is changed
|
||
|
|
local mod;
|
||
|
|
_, mod = math.modf(n11 / 11);
|
||
|
|
if mod ~= 0 and n11 + aux[4] - aux[2] - aux[3] ~= 0 then
|
||
|
|
if n11 == aux[1] + 1 then -- increase by 1
|
||
|
|
aux[5] = aux[5] * (aux[2] - aux[1]) / n11 * (aux[3] - aux[1]) / (n11 + aux[4] - aux[2] - aux[3]);
|
||
|
|
aux[1] = n11;
|
||
|
|
return aux[5];
|
||
|
|
end
|
||
|
|
if n11 == aux[1] - 1 then -- descrease by 1
|
||
|
|
aux[5] = aux[5] * aux[1] / (aux[2] - n11) * (aux[1] + aux[4] - aux[2] - aux[3]) / (aux[3] - n11);
|
||
|
|
aux[1] = n11;
|
||
|
|
return aux[5];
|
||
|
|
end
|
||
|
|
end
|
||
|
|
aux[1] = n11;
|
||
|
|
end
|
||
|
|
aux[5] = hypergeo(aux[1], aux[2], aux[3], aux[4]);
|
||
|
|
return aux[5];
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: computing the P-value by Fisher's exact test
|
||
|
|
local max, min, left, right, n1_, n_1, n, two, p, q, i, j;
|
||
|
|
n1_, n_1, n = n11 + n12, n11 + n21, n11 + n12 + n21 + n22;
|
||
|
|
max = (n_1 < n1_ and n_1) or n1_; -- max n11, for the right tail
|
||
|
|
min = n1_ + n_1 - n;
|
||
|
|
if min < 0 then min = 0 end -- min n11, for the left tail
|
||
|
|
two, left, right = 1, 1, 1;
|
||
|
|
if min == max then return 1 end -- no need to do test
|
||
|
|
q = hypergeo_inc(n11, n1_, n_1, n); -- the probability of the current table
|
||
|
|
-- left tail
|
||
|
|
i, left, p = min + 1, 0, hypergeo_inc(min, 0, 0, 0);
|
||
|
|
while p < 0.99999999 * q do
|
||
|
|
left, p, i = left + p, hypergeo_inc(i, 0, 0, 0), i + 1;
|
||
|
|
end
|
||
|
|
i = i - 1;
|
||
|
|
if p < 1.00000001 * q then left = left + p;
|
||
|
|
else i = i - 1 end
|
||
|
|
-- right tail
|
||
|
|
j, right, p = max - 1, 0, hypergeo_inc(max, 0, 0, 0);
|
||
|
|
while p < 0.99999999 * q do
|
||
|
|
right, p, j = right + p, hypergeo_inc(j, 0, 0, 0), j - 1;
|
||
|
|
end
|
||
|
|
j = j + 1;
|
||
|
|
if p < 1.00000001 * q then right = right + p;
|
||
|
|
else j = j + 1 end
|
||
|
|
-- two-tail
|
||
|
|
two = left + right;
|
||
|
|
if two > 1 then two = 1 end
|
||
|
|
-- adjust left and right
|
||
|
|
if math.abs(i - n11) < math.abs(j - n11) then right = 1 - left + q;
|
||
|
|
else left = 1 - right + q end
|
||
|
|
return left, right, two;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Delete-m Jackknife
|
||
|
|
--[[
|
||
|
|
Given g groups of values with a statistics estimated from m[i] samples in
|
||
|
|
i-th group being t[i], compute the mean and the variance. t0 below is the
|
||
|
|
estimate from all samples. Reference:
|
||
|
|
|
||
|
|
Busing et al. (1999) Delete-m Jackknife for unequal m. Statistics and Computing, 9:3-8.
|
||
|
|
]]--
|
||
|
|
function math.jackknife(g, m, t, t0)
|
||
|
|
local h, n, sum = {}, 0, 0;
|
||
|
|
for j = 1, g do n = n + m[j] end
|
||
|
|
if t0 == nil then -- When t0 is absent, estimate it in a naive way
|
||
|
|
t0 = 0;
|
||
|
|
for j = 1, g do t0 = t0 + m[j] * t[j] end
|
||
|
|
t0 = t0 / n;
|
||
|
|
end
|
||
|
|
local mean, var = 0, 0;
|
||
|
|
for j = 1, g do
|
||
|
|
h[j] = n / m[j];
|
||
|
|
mean = mean + (1 - m[j] / n) * t[j];
|
||
|
|
end
|
||
|
|
mean = g * t0 - mean; -- Eq. (8)
|
||
|
|
for j = 1, g do
|
||
|
|
local x = h[j] * t0 - (h[j] - 1) * t[j] - mean;
|
||
|
|
var = var + 1 / (h[j] - 1) * x * x;
|
||
|
|
end
|
||
|
|
var = var / g;
|
||
|
|
return mean, var;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Pearson correlation coefficient
|
||
|
|
-- Input: a is an n*2 table
|
||
|
|
function math.pearson(a)
|
||
|
|
-- compute the mean
|
||
|
|
local x1, y1 = 0, 0
|
||
|
|
for _, v in pairs(a) do
|
||
|
|
x1, y1 = x1 + v[1], y1 + v[2]
|
||
|
|
end
|
||
|
|
-- compute the coefficient
|
||
|
|
x1, y1 = x1 / #a, y1 / #a
|
||
|
|
local x2, y2, xy = 0, 0, 0
|
||
|
|
for _, v in pairs(a) do
|
||
|
|
local tx, ty = v[1] - x1, v[2] - y1
|
||
|
|
xy, x2, y2 = xy + tx * ty, x2 + tx * tx, y2 + ty * ty
|
||
|
|
end
|
||
|
|
return xy / math.sqrt(x2) / math.sqrt(y2)
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Spearman correlation coefficient
|
||
|
|
function math.spearman(a)
|
||
|
|
local function aux_func(t) -- auxiliary function
|
||
|
|
return (t == 1 and 0) or (t*t - 1) * t / 12
|
||
|
|
end
|
||
|
|
|
||
|
|
for _, v in pairs(a) do v.r = {} end
|
||
|
|
local T, S = {}, {}
|
||
|
|
-- compute the rank
|
||
|
|
for k = 1, 2 do
|
||
|
|
table.sort(a, function(u,v) return u[k]<v[k] end)
|
||
|
|
local same = 1
|
||
|
|
T[k] = 0
|
||
|
|
for i = 2, #a + 1 do
|
||
|
|
if i <= #a and a[i-1][k] == a[i][k] then same = same + 1
|
||
|
|
else
|
||
|
|
local rank = (i-1) * 2 - same + 1
|
||
|
|
for j = i - same, i - 1 do a[j].r[k] = rank end
|
||
|
|
if same > 1 then T[k], same = T[k] + aux_func(same), 1 end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
S[k] = aux_func(#a) - T[k]
|
||
|
|
end
|
||
|
|
-- compute the coefficient
|
||
|
|
local sum = 0
|
||
|
|
for _, v in pairs(a) do -- TODO: use nested loops to reduce loss of precision
|
||
|
|
local t = (v.r[1] - v.r[2]) / 2
|
||
|
|
sum = sum + t * t
|
||
|
|
end
|
||
|
|
return (S[1] + S[2] - sum) / 2 / math.sqrt(S[1] * S[2])
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Hooke-Jeeves derivative-free optimization
|
||
|
|
function math.fmin(func, x, data, r, eps, max_calls)
|
||
|
|
local n, n_calls = #x, 0;
|
||
|
|
r = r or 0.5;
|
||
|
|
eps = eps or 1e-7;
|
||
|
|
max_calls = max_calls or 50000
|
||
|
|
|
||
|
|
function fmin_aux(x1, data, fx1, dx) -- auxiliary function
|
||
|
|
local ftmp;
|
||
|
|
for k = 1, n do
|
||
|
|
x1[k] = x1[k] + dx[k];
|
||
|
|
local ftmp = func(x1, data); n_calls = n_calls + 1;
|
||
|
|
if ftmp < fx1 then fx1 = ftmp;
|
||
|
|
else -- search the opposite direction
|
||
|
|
dx[k] = -dx[k];
|
||
|
|
x1[k] = x1[k] + dx[k] + dx[k];
|
||
|
|
ftmp = func(x1, data); n_calls = n_calls + 1;
|
||
|
|
if ftmp < fx1 then fx1 = ftmp
|
||
|
|
else x1[k] = x1[k] - dx[k] end -- back to the original x[k]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
return fx1; -- here: fx1=f(n,x1)
|
||
|
|
end
|
||
|
|
|
||
|
|
local dx, x1 = {}, {};
|
||
|
|
for k = 1, n do -- initial directions, based on MGJ
|
||
|
|
dx[k] = math.abs(x[k]) * r;
|
||
|
|
if dx[k] == 0 then dx[k] = r end;
|
||
|
|
end
|
||
|
|
local radius = r;
|
||
|
|
local fx1, fx;
|
||
|
|
fx = func(x, data); fx1 = fx; n_calls = n_calls + 1;
|
||
|
|
while true do
|
||
|
|
for i = 1, n do x1[i] = x[i] end; -- x1 = x
|
||
|
|
fx1 = fmin_aux(x1, data, fx, dx);
|
||
|
|
while fx1 < fx do
|
||
|
|
for k = 1, n do
|
||
|
|
local t = x[k];
|
||
|
|
dx[k] = (x1[k] > x[k] and math.abs(dx[k])) or -math.abs(dx[k]);
|
||
|
|
x[k] = x1[k];
|
||
|
|
x1[k] = x1[k] + x1[k] - t;
|
||
|
|
end
|
||
|
|
fx = fx1;
|
||
|
|
if n_calls >= max_calls then break end
|
||
|
|
fx1 = func(x1, data); n_calls = n_calls + 1;
|
||
|
|
fx1 = fmin_aux(x1, data, fx1, dx);
|
||
|
|
if fx1 >= fx then break end
|
||
|
|
local kk = n;
|
||
|
|
for k = 1, n do
|
||
|
|
if math.abs(x1[k] - x[k]) > .5 * math.abs(dx[k]) then
|
||
|
|
kk = k;
|
||
|
|
break;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
if kk == n then break end
|
||
|
|
end
|
||
|
|
if radius >= eps then
|
||
|
|
if n_calls >= max_calls then break end
|
||
|
|
radius = radius * r;
|
||
|
|
for k = 1, n do dx[k] = dx[k] * r end
|
||
|
|
else break end
|
||
|
|
end
|
||
|
|
return fx1, n_calls;
|
||
|
|
end
|
||
|
|
|
||
|
|
--
|
||
|
|
-- Matrix
|
||
|
|
--
|
||
|
|
|
||
|
|
matrix = {}
|
||
|
|
|
||
|
|
-- Description: matrix transpose
|
||
|
|
-- Required by: matrix.mul()
|
||
|
|
function matrix.T(a)
|
||
|
|
local m, n, x = #a, #a[1], {};
|
||
|
|
for i = 1, n do
|
||
|
|
x[i] = {};
|
||
|
|
for j = 1, m do x[i][j] = a[j][i] end
|
||
|
|
end
|
||
|
|
return x;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: matrix add
|
||
|
|
function matrix.add(a, b)
|
||
|
|
assert(#a == #b and #a[1] == #b[1]);
|
||
|
|
local m, n, x = #a, #a[1], {};
|
||
|
|
for i = 1, m do
|
||
|
|
x[i] = {};
|
||
|
|
local ai, bi, xi = a[i], b[i], x[i];
|
||
|
|
for j = 1, n do xi[j] = ai[j] + bi[j] end
|
||
|
|
end
|
||
|
|
return x;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: matrix mul
|
||
|
|
-- Dependent on: matrix.T()
|
||
|
|
-- Note: much slower without transpose
|
||
|
|
function matrix.mul(a, b)
|
||
|
|
assert(#a[1] == #b);
|
||
|
|
local m, n, p, x = #a, #a[1], #b[1], {};
|
||
|
|
local c = matrix.T(b); -- transpose for efficiency
|
||
|
|
for i = 1, m do
|
||
|
|
x[i] = {}
|
||
|
|
local xi = x[i];
|
||
|
|
for j = 1, p do
|
||
|
|
local sum, ai, cj = 0, a[i], c[j];
|
||
|
|
for k = 1, n do sum = sum + ai[k] * cj[k] end
|
||
|
|
xi[j] = sum;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
return x;
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: matrix print
|
||
|
|
function matrix.tostring(a)
|
||
|
|
local z = {};
|
||
|
|
for i = 1, #a do
|
||
|
|
z[i] = table.concat(a[i], "\t");
|
||
|
|
end
|
||
|
|
return table.concat(z, "\n");
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: chi^2 test for contingency tables
|
||
|
|
-- Dependent on: math.igamma()
|
||
|
|
function matrix.chi2(a)
|
||
|
|
if #a == 2 and #a[1] == 2 then -- 2x2 table
|
||
|
|
local x, z
|
||
|
|
x = (a[1][1] + a[1][2]) * (a[2][1] + a[2][2]) * (a[1][1] + a[2][1]) * (a[1][2] + a[2][2])
|
||
|
|
if x == 0 then return 0, 1, false end
|
||
|
|
z = a[1][1] * a[2][2] - a[1][2] * a[2][1]
|
||
|
|
z = (a[1][1] + a[1][2] + a[2][1] + a[2][2]) * z * z / x
|
||
|
|
return z, math.igamma(.5, .5 * z, true), true
|
||
|
|
else -- generic table
|
||
|
|
local rs, cs, n, m, N, z = {}, {}, #a, #a[1], 0, 0
|
||
|
|
for i = 1, n do rs[i] = 0 end
|
||
|
|
for j = 1, m do cs[j] = 0 end
|
||
|
|
for i = 1, n do -- compute column sum and row sum
|
||
|
|
for j = 1, m do cs[j], rs[i] = cs[j] + a[i][j], rs[i] + a[i][j] end
|
||
|
|
end
|
||
|
|
for i = 1, n do N = N + rs[i] end
|
||
|
|
for i = 1, n do -- compute the chi^2 statistics
|
||
|
|
for j = 1, m do
|
||
|
|
local E = rs[i] * cs[j] / N;
|
||
|
|
z = z + (a[i][j] - E) * (a[i][j] - E) / E
|
||
|
|
end
|
||
|
|
end
|
||
|
|
return z, math.igamma(.5 * (n-1) * (m-1), .5 * z, true), true;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
-- Description: Gauss-Jordan elimination (solving equations; computing inverse)
|
||
|
|
-- Note: on return, a[n][n] is the inverse; b[n][m] is the solution
|
||
|
|
-- Reference: Section 2.1, Numerical Recipes in C, 2nd edition
|
||
|
|
function matrix.solve(a, b)
|
||
|
|
assert(#a == #a[1]);
|
||
|
|
local n, m = #a, (b and #b[1]) or 0;
|
||
|
|
local xc, xr, ipiv = {}, {}, {};
|
||
|
|
local ic, ir;
|
||
|
|
|
||
|
|
for j = 1, n do ipiv[j] = 0 end
|
||
|
|
for i = 1, n do
|
||
|
|
local big = 0;
|
||
|
|
for j = 1, n do
|
||
|
|
local aj = a[j];
|
||
|
|
if ipiv[j] ~= 1 then
|
||
|
|
for k = 1, n do
|
||
|
|
if ipiv[k] == 0 then
|
||
|
|
if math.abs(aj[k]) >= big then
|
||
|
|
big = math.abs(aj[k]);
|
||
|
|
ir, ic = j, k;
|
||
|
|
end
|
||
|
|
elseif ipiv[k] > 1 then return -2 end -- singular matrix
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
ipiv[ic] = ipiv[ic] + 1;
|
||
|
|
if ir ~= ic then
|
||
|
|
for l = 1, n do a[ir][l], a[ic][l] = a[ic][l], a[ir][l] end
|
||
|
|
if b then
|
||
|
|
for l = 1, m do b[ir][l], b[ic][l] = b[ic][l], b[ir][l] end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
xr[i], xc[i] = ir, ic;
|
||
|
|
if a[ic][ic] == 0 then return -3 end -- singular matrix
|
||
|
|
local pivinv = 1 / a[ic][ic];
|
||
|
|
a[ic][ic] = 1;
|
||
|
|
for l = 1, n do a[ic][l] = a[ic][l] * pivinv end
|
||
|
|
if b then
|
||
|
|
for l = 1, n do b[ic][l] = b[ic][l] * pivinv end
|
||
|
|
end
|
||
|
|
for ll = 1, n do
|
||
|
|
if ll ~= ic then
|
||
|
|
local tmp = a[ll][ic];
|
||
|
|
a[ll][ic] = 0;
|
||
|
|
local all, aic = a[ll], a[ic];
|
||
|
|
for l = 1, n do all[l] = all[l] - aic[l] * tmp end
|
||
|
|
if b then
|
||
|
|
local bll, bic = b[ll], b[ic];
|
||
|
|
for l = 1, m do bll[l] = bll[l] - bic[l] * tmp end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
for l = n, 1, -1 do
|
||
|
|
if xr[l] ~= xc[l] then
|
||
|
|
for k = 1, n do a[k][xr[l]], a[k][xc[l]] = a[k][xc[l]], a[k][xr[l]] end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
return 0;
|
||
|
|
end
|