r885: compute dup with asmstat
This commit is contained in:
parent
6596c63dcd
commit
cdaf46665a
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env k8
|
#!/usr/bin/env k8
|
||||||
|
|
||||||
var paftools_version = '2.14-r883';
|
var paftools_version = '2.14-r885-dirty';
|
||||||
|
|
||||||
/*****************************
|
/*****************************
|
||||||
***** Library functions *****
|
***** Library functions *****
|
||||||
|
|
@ -564,16 +564,18 @@ function paf_call(args)
|
||||||
|
|
||||||
function paf_asmstat(args)
|
function paf_asmstat(args)
|
||||||
{
|
{
|
||||||
var c, min_seg_len = 10000, max_diff = 0.01, bp_flank_len = 0, bp_gap_len = 0;
|
var c, min_query_len = 0, min_seg_len = 10000, max_diff = 0.01, bp_flank_len = 0, bp_gap_len = 0;
|
||||||
while ((c = getopt(args, "l:d:b:g:")) != null) {
|
while ((c = getopt(args, "l:d:b:g:q:")) != null) {
|
||||||
if (c == 'l') min_seg_len = parseInt(getopt.arg);
|
if (c == 'l') min_seg_len = parseInt(getopt.arg);
|
||||||
else if (c == 'd') max_diff = parseFloat(getopt.arg);
|
else if (c == 'd') max_diff = parseFloat(getopt.arg);
|
||||||
else if (c == 'b') bp_flank_len = parseInt(getopt.arg);
|
else if (c == 'b') bp_flank_len = parseInt(getopt.arg);
|
||||||
else if (c == 'g') bp_gap_len = parseInt(getopt.arg);
|
else if (c == 'g') bp_gap_len = parseInt(getopt.arg);
|
||||||
|
else if (c == 'q') min_query_len = parseInt(getopt.arg);
|
||||||
}
|
}
|
||||||
if (getopt.ind == args.length) {
|
if (getopt.ind == args.length) {
|
||||||
print("Usage: paftools.js asmstat [options] <ref.fa.fai> <asm1.paf> [...]");
|
print("Usage: paftools.js asmstat [options] <ref.fa.fai> <asm1.paf> [...]");
|
||||||
print("Options:");
|
print("Options:");
|
||||||
|
print(" -q INT ignore query shorter than INT [0]");
|
||||||
print(" -l INT min alignment block length [" + min_seg_len + "]");
|
print(" -l INT min alignment block length [" + min_seg_len + "]");
|
||||||
print(" -d FLOAT max gap-compressed sequence divergence [" + max_diff + "]");
|
print(" -d FLOAT max gap-compressed sequence divergence [" + max_diff + "]");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
@ -657,7 +659,7 @@ function paf_asmstat(args)
|
||||||
return (NM - n_gaps + n_gapo) / (n_M + n_gapo);
|
return (NM - n_gaps + n_gapo) / (n_M + n_gapo);
|
||||||
}
|
}
|
||||||
|
|
||||||
var labels = ['Length', 'NG50', 'Coverage', 'Qcov', 'NGA50', '#breaks', 'bp(' + min_seg_len + ',0)', 'bp(' + min_seg_len + ',10k)'];
|
var labels = ['Length', 'l_cov', 'Rcov', 'Rdup', 'Qcov', 'NG75', 'NG50', 'NGA50', '#breaks', 'bp(' + min_seg_len + ',0)', 'bp(' + min_seg_len + ',10k)'];
|
||||||
var rst = [];
|
var rst = [];
|
||||||
for (var i = 0; i < labels.length; ++i)
|
for (var i = 0; i < labels.length; ++i)
|
||||||
rst[i] = [];
|
rst[i] = [];
|
||||||
|
|
@ -677,6 +679,7 @@ function paf_asmstat(args)
|
||||||
var m, line = buf.toString();
|
var m, line = buf.toString();
|
||||||
var t = line.split("\t");
|
var t = line.split("\t");
|
||||||
t[1] = parseInt(t[1]);
|
t[1] = parseInt(t[1]);
|
||||||
|
if (t[1] < min_query_len) continue;
|
||||||
if (t.length >= 2) {
|
if (t.length >= 2) {
|
||||||
query[t[0]] = t[1];
|
query[t[0]] = t[1];
|
||||||
if (qinfo[t[0]] == null) qinfo[t[0]] = {};
|
if (qinfo[t[0]] == null) qinfo[t[0]] = {};
|
||||||
|
|
@ -717,7 +720,8 @@ function paf_asmstat(args)
|
||||||
asm_lens.push(query[ctg]);
|
asm_lens.push(query[ctg]);
|
||||||
}
|
}
|
||||||
rst[0][i] = asm_len;
|
rst[0][i] = asm_len;
|
||||||
rst[1][i] = N50(asm_lens, ref_len, 0.5);
|
rst[5][i] = N50(asm_lens, ref_len, 0.75);
|
||||||
|
rst[6][i] = N50(asm_lens, ref_len, 0.5);
|
||||||
|
|
||||||
// compute coverage
|
// compute coverage
|
||||||
var l_cov = 0;
|
var l_cov = 0;
|
||||||
|
|
@ -732,18 +736,42 @@ function paf_asmstat(args)
|
||||||
} else en = en > ref_blocks[j][2]? en : ref_blocks[j][2];
|
} else en = en > ref_blocks[j][2]? en : ref_blocks[j][2];
|
||||||
}
|
}
|
||||||
l_cov += en - st;
|
l_cov += en - st;
|
||||||
|
rst[1][i] = l_cov;
|
||||||
rst[2][i] = (100.0 * (l_cov / ref_len)).toFixed(2) + '%';
|
rst[2][i] = (100.0 * (l_cov / ref_len)).toFixed(2) + '%';
|
||||||
rst[3][i] = (100.0 * (qcov / asm_len)).toFixed(2) + '%';
|
rst[4][i] = (100.0 * (qcov / asm_len)).toFixed(2) + '%';
|
||||||
|
|
||||||
|
// compute cov1 and cov2+ lengths; see paf_call() for details
|
||||||
|
var c1_ctg = null, c1_start = 0, c1_end = 0, c1_len = 0;
|
||||||
|
for (var j = 0; j < ref_blocks.length; ++j) {
|
||||||
|
if (ref_blocks[j][0] != c1_ctg || ref_blocks[j][1] >= c1_end) {
|
||||||
|
if (c1_end > c1_start)
|
||||||
|
c1_len += c1_end - c1_start;
|
||||||
|
c1_ctg = ref_blocks[j][0], c1_start = ref_blocks[j][1], c1_end = ref_blocks[j][2];
|
||||||
|
} else if (ref_blocks[j][2] > c1_end) { // overlap
|
||||||
|
if (ref_blocks[j][1] > c1_start)
|
||||||
|
c1_len += ref_blocks[j][1] - c1_start;
|
||||||
|
c1_start = c1_end, c1_end = ref_blocks[j][2];
|
||||||
|
} else if (ref_blocks[j][2] > c1_start) { // contained
|
||||||
|
if (ref_blocks[j][1] > c1_start)
|
||||||
|
c1_len += ref_blocks[j][1] - c1_start;
|
||||||
|
c1_start = ref_blocks[j][2];
|
||||||
|
}
|
||||||
|
//print(ref_blocks[j][0], ref_blocks[j][1], ref_blocks[j][2], c1_start, c1_end, c1_len);
|
||||||
|
}
|
||||||
|
if (c1_end > c1_start)
|
||||||
|
c1_len += c1_end - c1_start;
|
||||||
|
rst[3][i] = (100 * (l_cov - c1_len) / l_cov).toFixed(2) + '%';
|
||||||
|
|
||||||
// compute NGA50
|
// compute NGA50
|
||||||
rst[4][i] = N50(qblock_len, ref_len, 0.5);
|
rst[7][i] = N50(qblock_len, ref_len, 0.5);
|
||||||
|
|
||||||
// compute break points
|
// compute break points
|
||||||
rst[5][i] = n_breaks;
|
rst[8][i] = n_breaks;
|
||||||
rst[6][i] = count_bp(bp, 500, 0);
|
rst[9][i] = count_bp(bp, 500, 0);
|
||||||
rst[7][i] = count_bp(bp, 500, 10000);
|
rst[10][i] = count_bp(bp, 500, 10000);
|
||||||
|
|
||||||
// nb-plot
|
// nb-plot; NOT USED
|
||||||
|
/*
|
||||||
var qa = [];
|
var qa = [];
|
||||||
for (var qn in qinfo)
|
for (var qn in qinfo)
|
||||||
qa.push([qinfo[qn].len, qinfo[qn].bp]);
|
qa.push([qinfo[qn].len, qinfo[qn].bp]);
|
||||||
|
|
@ -760,6 +788,7 @@ function paf_asmstat(args)
|
||||||
if (next_quantile >= 1.0) break;
|
if (next_quantile >= 1.0) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
buf.destroy();
|
buf.destroy();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue