From f10dff78dc20edd856e36a80360df76631b9cb8e Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 14 Apr 2020 15:52:36 -0400 Subject: [PATCH] r981: asmgene to check duplicate genes --- misc/paftools.js | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/misc/paftools.js b/misc/paftools.js index 2605fe3..f864946 100755 --- a/misc/paftools.js +++ b/misc/paftools.js @@ -1,6 +1,6 @@ #!/usr/bin/env k8 -var paftools_version = '2.17-r980-dirty'; +var paftools_version = '2.17-r981-dirty'; /***************************** ***** Library functions ***** @@ -905,14 +905,14 @@ function paf_asmgene(args) gene_nr[gene_list[last][0]] = 1; // count and print - var col1 = ["full_sgl", "full_dup", "frag", "part50+", "part10+", "part10-"]; + var col1 = ["full_sgl", "full_dup", "frag", "part50+", "part10+", "part10-", "dup_cnt", "dup_sum"]; var rst = []; for (var k = 0; k < col1.length; ++k) { rst[k] = []; for (var i = 0; i < n_fn; ++i) rst[k][i] = 0; } - for (var g in gene) { + for (var g in gene) { // count single-copy genes if (gene[g][0] == null || gene[g][0][0] != 1) continue; if (gene_nr[g] == null) continue; if (auto_only && /^(chr)?[XY]$/.test(refpos[g][2])) continue; @@ -920,8 +920,9 @@ function paf_asmgene(args) if (gene[g][i] == null) { rst[5][i]++; if (print_err) print('M', header[i], refpos[g].join("\t")); - } else if (gene[g][i][0] == 1) rst[0][i]++; - else if (gene[g][i][0] > 1) { + } else if (gene[g][i][0] == 1) { + rst[0][i]++; + } else if (gene[g][i][0] > 1) { rst[1][i]++; if (print_err) print('D', header[i], refpos[g].join("\t")); } else if (gene[g][i][1] >= opt.min_cov) { @@ -939,6 +940,17 @@ function paf_asmgene(args) } } } + for (var g in gene) { // count multi-copy genes + if (gene[g][0] == null || gene[g][0][0] == 1) continue; + if (gene_nr[g] == null) continue; + if (auto_only && /^(chr)?[XY]$/.test(refpos[g][2])) continue; + for (var i = 0; i < n_fn; ++i) { + if (gene[g][i] != null && gene[g][i][0] > 1) { + rst[6][i]++; + rst[7][i] += gene[g][i][0]; + } + } + } print('H', 'Metric', header.join("\t")); for (var k = 0; k < rst.length; ++k) { print('X', col1[k], rst[k].join("\t"));