From 9d049f0562b5faedeeb91834505d9e942f690e28 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 5 Aug 2021 12:41:21 -0400 Subject: [PATCH] added pafcmp --- misc/paftools.js | 113 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/misc/paftools.js b/misc/paftools.js index 1a6b8f1..c2c9ec1 100755 --- a/misc/paftools.js +++ b/misc/paftools.js @@ -2947,6 +2947,117 @@ function paf_vcfsel(args) buf.destroy(); } +function paf_pafcmp(args) +{ + var c, opt = { min_len:5000, min_mapq:5, min_ovlp:0.5 }; + while ((c = getopt(args, "c:")) != null) { + } + + var buf = new Bytes(); + if (args.length - getopt.ind < 2) { + print("Usage: paftools.js pafcmp [options] "); + return 1; + } + + var eval = { n_base:0, n_test:0, n_out_high:0, n_out_low:0, n_hit:0, n_wrong:0, n_miss:0 }; + + function process_base(base, a) { + if (a.length != 1) return; + for (var i = 1; i < 4; ++i) + a[0][i] = parseInt(a[0][i]); + for (var i = 6; i < 12; ++i) + a[0][i] = parseInt(a[0][i]); + if (a[0][1] < opt.min_len) return; + if (a[0][11] >= opt.min_mapq) ++eval.n_base; + base[a[0][0]] = [a[0][5], a[0][7], a[0][8], a[0][11], 0, 0]; + } + + var file = new File(args[getopt.ind]); + warn("Reading " + args[getopt.ind] + "..."); + var a = [], base = {}; + while (file.readline(buf) >= 0) { + var line = buf.toString(); + var t = line.split("\t"); + if (/\ttp:A:S/.test(line)) continue; + if (a.length > 0 && a[0][0] != t[0]) { + process_base(base, a); + a = []; + } + a.push(t); + } + process_base(base, a); + file.close(); + + function process_test(base, a) { + for (var i = 1; i < 4; ++i) + a[0][i] = parseInt(a[0][i]); + for (var i = 6; i < 12; ++i) + a[0][i] = parseInt(a[0][i]); + if (a[0][1] < opt.min_len) return; + if (a[0][11] >= opt.min_mapq) ++eval.n_test; + var c = [a[0][5], a[0][7], a[0][8], a[0][11]]; + if (base[a[0][0]] == null) { + if (c[3] >= opt.min_mapq) ++opt.n_out_high; + else ++opt.n_out_low; + } else { + var b = base[a[0][0]]; + var inter = 0, union = (b[2] - b[1]) + (c[2] - c[1]); + if (b[0] == c[0]) { // same chr + if (b[1] < c[1]) { + if (b[2] > c[1]) + inter = b[2] - c[1], union = c[2] - b[1]; + } else { // c[1] < b[1] + if (c[2] > b[1]) + inter = c[2] - b[1], union = b[2] - c[1]; + } + } + if (inter >= union * opt.min_ovlp) { + if (b[3] >= opt.min_mapq) ++eval.n_hit; + ++b[4]; + } else { + if (b[3] >= opt.min_mapq) { + print("W", a[0][0], b.slice(0, 4).join("\t"), c.join("\t")); + ++eval.n_wrong; + } + ++b[5]; + } + } + } + + file = new File(args[getopt.ind+1]); + warn("Reading " + args[getopt.ind+1] + "..."); + a = []; + while (file.readline(buf) >= 0) { + var line = buf.toString(); + var t = line.split("\t"); + if (/\ttp:A:S/.test(line)) continue; + if (a.length > 0 && a[0][0] != t[0]) { + process_test(base, a); + a = []; + } + a.push(t); + } + process_test(base, a); + file.close(); + + for (var r in base) { + var b = base[r]; + if (b[3] >= opt.min_mapq && b[4] == 0 && b[5] == 0) { + ++eval.n_miss; + print("M", r, b.slice(0, 4).join("\t")); + } + } + + print("X", eval.n_base + " base alignments with mapQ>=" + opt.min_mapq); +// print("X", eval.n_test + " test alignments with mapQ>=" + opt.min_mapq); + print("X", eval.n_hit + " base alignments correctly mapped by test"); + print("X", eval.n_wrong + " wrong test alignment"); + print("X", eval.n_miss + " base alignments missing"); + print("X", eval.n_out_high + " additional test alignments with mapQ>=" + opt.min_mapq); + + buf.destroy(); +} + /************************* ***** main function ***** *************************/ @@ -2974,6 +3085,7 @@ function main(args) print(" version print paftools.js version"); print(""); print(" mapeval evaluate mapping accuracy using mason2/PBSIM-simulated FASTQ"); + print(" pafcmp compare two PAF files"); print(" mason2fq convert mason2-simulated SAM to FASTQ"); print(" pbsim2fq convert PBSIM-simulated MAF to FASTQ"); print(" junceval evaluate splice junction consistency with known annotations"); @@ -2995,6 +3107,7 @@ function main(args) else if (cmd == 'vcfpair') paf_vcfpair(args); else if (cmd == 'call') paf_call(args); else if (cmd == 'mapeval') paf_mapeval(args); + else if (cmd == 'pafcmp') paf_pafcmp(args); else if (cmd == 'bedcov') paf_bedcov(args); else if (cmd == 'mason2fq') paf_mason2fq(args); else if (cmd == 'pbsim2fq') paf_pbsim2fq(args);