diff --git a/misc/mapstat.js b/misc/mapstat.js new file mode 100644 index 0000000..8c835be --- /dev/null +++ b/misc/mapstat.js @@ -0,0 +1,128 @@ +var buf = new Bytes(); +var file = arguments.length? new File(arguments[0]) : new File(); +var re = /(\d+)([MIDSHNX=])/g; + +var lineno = 0, n_pri = 0, n_2nd = 0, n_seq = 0, n_cigar_64k = 0, l_tot = 0, l_cov = 0; +var n_gap = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]; + +function cov_len(regs) +{ + regs.sort(function(a,b) {return a[0]-b[0]}); + var st = regs[0][0], en = regs[0][1], l = 0; + for (var i = 1; i < regs.length; ++i) { + if (regs[i][0] < en) + en = en > regs[i][1]? en : regs[i][1]; + else l += en - st, st = regs[i][0], en = regs[i][1]; + } + l += en - st; + return l; +} + +var last = null, last_qlen = null, regs = []; +while (file.readline(buf) >= 0) { + var line = buf.toString(); + ++lineno; + if (line.charAt(0) != '@') { + var t = line.split("\t", 12); + var m, cigar = null, is_pri = false, is_sam = false, is_rev = false; + var atlen = null, aqlen, qs, qe, mapq, ori_qlen; + if (t[4] == '+' || t[4] == '-') { // PAF + if (!/\ts2:i:\d+/.test(line)) { + ++n_2nd; + continue; + } + if ((m = /\tcg:Z:(\S+)/.exec(line)) != null) + cigar = m[1]; + if (cigar == null) { + warn("WARNING: no CIGAR at line " + lineno); + continue; + } + qs = parseInt(t[2]), qe = parseInt(t[3]); + aqlen = qe - qs; + atlen = parseInt(t[8]) - parseInt(t[7]); + mapq = parseInt(t[11]); + ori_qlen = parseInt(t[1]); + } else { // SAM + var flag = parseInt(t[1]); + if (flag & 4) continue; + if (flag & 0x100) { + ++n_2nd; + continue; + } + cigar = t[5]; + rs = parseInt(t[3]) - 1; + mapq = parseInt(t[4]); + aqlen = t[9].length; + is_sam = true; + is_rev = !!(flag&0x10); + } + ++n_pri; + if (last != t[0]) { + if (last != null) { + l_tot += last_qlen; + l_cov += cov_len(regs); + } + regs = []; + ++n_seq, last = t[0]; + } + var M = 0, tl = 0, ql = 0, clip = [0, 0], n_cigar = 0, sclip = 0; + while ((m = re.exec(cigar)) != null) { + var l = parseInt(m[1]); + ++n_cigar; + if (m[2] == 'M' || m[2] == '=' || m[2] == 'X') { + tl += l, ql += l, M += l; + } else if (m[2] == 'I' || m[2] == 'D') { + var type; + if (l < 50) type = 0; + else if (l < 100) type = 1; + else if (l < 300) type = 2; + else if (l < 400) type = 3; + else if (l < 1000) type = 4; + else type = 5; + if (m[2] == 'I') ql += l, ++n_gap[0][type]; + else tl += l, ++n_gap[1][type]; + } else if (m[2] == 'N') { + tl += l; + } else if (m[2] == 'S') { + clip[M == 0? 0 : 1] = l, sclip += l; + } else if (m[2] == 'H') { + clip[M == 0? 0 : 1] = l; + } + } + if (n_cigar > 65535) ++n_cigar_64k; + if (ql + sclip != aqlen) + warn("WARNING: aligned query length is inconsistent with CIGAR at line " + lineno); + if (atlen != null && atlen != tl) + warn("WARNING: aligned reference length is inconsistent with CIGAR at line " + lineno); + if (is_sam) { + qs = clip[is_rev? 1 : 0], qe = qs + ql; + ori_qlen = clip[0] + ql + clip[1]; + } + regs.push([qs, qe]); + last_qlen = ori_qlen; + } +} +l_tot += last_qlen; +l_cov += cov_len(regs); + +print("Number of mapped sequences: " + n_seq); +print("Number of primary alignments: " + n_pri); +print("Number of secondary alignments: " + n_2nd); +print("Number of primary alignments with >65535 CIGAR operations: " + n_cigar_64k); +print("Number of bases in mapped sequences: " + l_tot); +print("Number of mapped bases: " + l_cov); +print("Number of insertions in [0,50): " + n_gap[0][0]); +print("Number of insertions in [50,100): " + n_gap[0][1]); +print("Number of insertions in [100,300): " + n_gap[0][2]); +print("Number of insertions in [300,400): " + n_gap[0][3]); +print("Number of insertions in [400,1000): " + n_gap[0][4]); +print("Number of insertions in [1000,inf): " + n_gap[0][5]); +print("Number of deletions in [0,50): " + n_gap[1][0]); +print("Number of deletions in [50,100): " + n_gap[1][1]); +print("Number of deletions in [100,300): " + n_gap[1][2]); +print("Number of deletions in [300,400): " + n_gap[1][3]); +print("Number of deletions in [400,1000): " + n_gap[1][4]); +print("Number of deletions in [1000,inf): " + n_gap[1][5]); + +file.close(); +buf.destroy();