r1022: check INDEL lengths in simple cases

This commit is contained in:
Heng Li 2021-04-11 22:07:00 -04:00
parent 86b716448c
commit cd9ccfa069
1 changed files with 29 additions and 6 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env k8 #!/usr/bin/env k8
var paftools_version = '2.18-r1018-dirty'; var paftools_version = '2.18-r1022-dirty';
/***************************** /*****************************
***** Library functions ***** ***** Library functions *****
@ -2742,11 +2742,13 @@ function _paf_get_alen(t)
function paf_sveval(args) function paf_sveval(args)
{ {
var c, min_flt = 30, min_size = 50, max_size = 100000, win_size = 500, print_err = false, print_match = false, bed_fn = null; var c, min_flt = 30, min_size = 50, max_size = 100000, win_size = 500, print_err = false, print_match = false, bed_fn = null;
while ((c = getopt(args, "f:i:x:w:er:p")) != null) { var len_diff_ratio = 0.5;
while ((c = getopt(args, "f:i:x:w:er:pd:")) != null) {
if (c == 'f') min_flt = paf_parseNum(getopt.arg); if (c == 'f') min_flt = paf_parseNum(getopt.arg);
else if (c == 'i') min_size = paf_parseNum(getopt.arg); else if (c == 'i') min_size = paf_parseNum(getopt.arg);
else if (c == 'x') max_size = paf_parseNum(getopt.arg); else if (c == 'x') max_size = paf_parseNum(getopt.arg);
else if (c == 'w') win_size = paf_parseNum(getopt.arg); else if (c == 'w') win_size = paf_parseNum(getopt.arg);
else if (c == 'd') len_diff_ratio = parseFloat(getopt.arg);
else if (c == 'r') bed_fn = getopt.arg; else if (c == 'r') bed_fn = getopt.arg;
else if (c == 'e') print_err = true; else if (c == 'e') print_err = true;
else if (c == 'p') print_match = true; else if (c == 'p') print_match = true;
@ -2759,6 +2761,7 @@ function paf_sveval(args)
print(" -i INT min SV length [" + min_size + "]"); print(" -i INT min SV length [" + min_size + "]");
print(" -x INT max SV length [" + max_size + "]"); print(" -x INT max SV length [" + max_size + "]");
print(" -w INT fuzzy windown size [" + win_size + "]"); print(" -w INT fuzzy windown size [" + win_size + "]");
print(" -d FLOAT max allele diff if there is a single allele in the window [" + len_diff_ratio + "]");
print(" -e print errors"); print(" -e print errors");
return; return;
} }
@ -2831,15 +2834,35 @@ function paf_sveval(args)
var ws = win_size + (a1[i][3]>>1); var ws = win_size + (a1[i][3]>>1);
var st = a1[i][0] > ws? a1[i][0] - ws : 0; var st = a1[i][0] > ws? a1[i][0] - ws : 0;
b = Interval.find_ovlp(a0, st, a1[i][1] + ws); b = Interval.find_ovlp(a0, st, a1[i][1] + ws);
var match = false; var n_ins = 0, n_del = 0, sv_del = null, sv_ins = null;
for (var j = 0; j < b.length; ++j) { for (var j = 0; j < b.length; ++j) {
if (b[j][2] * a1[i][2] > 0) if (b[j][2] < 0) ++n_del, sv_del = -b[j][2];
match = true; else if (b[j][2] > 0) ++n_ins, sv_ins = b[j][2];
if (print_match) if (print_match)
print("MA", x, a1[i].slice(0, 3).join("\t"), b[j].slice(0, 3).join("\t")); print("MA", x, a1[i].slice(0, 3).join("\t"), b[j].slice(0, 3).join("\t"));
} }
var match = false;
if (a1[i][2] > 0) { // insertion
if (n_ins == 1) {
var diff = sv_ins - a1[i][3];
if (diff < 0) diff = -diff;
if (diff < min_size || diff / a1[i][3] < len_diff_ratio)
match = true;
} else if (n_ins > 1) match = true; // multiple insertions; ambiguous
} else if (a1[i][2] < 0) {
if (n_del == 1) { // deletion
var diff = sv_del - a1[i][3];
if (diff < 0) diff = -diff;
if (diff < min_size || diff / a1[i][3] < len_diff_ratio)
match = true;
} else if (n_del > 1) match = true; // multiple deletions; ambiguous
}
if (match) ++m; if (match) ++m;
else if (print_err) print(label, x, a1[i].slice(0, 3).join("\t")); else if (print_err) {
if ((a1[i][2] > 0 && n_ins > 0) || (a1[i][2] < 0 && n_del > 0))
print("MM", x, a1[i].slice(0, 3).join("\t"));
print(label, x, a1[i].slice(0, 3).join("\t"));
}
} }
} }
return [n, m]; return [n, m];