r858: bugfix in PE alt alignments

Alignments found with mate rescue are not flagged with "is_alt"
This commit is contained in:
Heng Li 2014-09-18 10:47:21 -04:00
parent b9026ecafd
commit dd1c6619d7
4 changed files with 32 additions and 47 deletions

View File

@ -520,7 +520,7 @@ function read_ALT_sam(fn)
return idx; return idx;
} }
function bwa_genalt(args) function bwa_altgen(args)
{ {
var c, opt = { a:1, b:4, o:6, e:1, verbose:3 }; var c, opt = { a:1, b:4, o:6, e:1, verbose:3 };
@ -529,7 +529,7 @@ function bwa_genalt(args)
} }
if (args.length == getopt.ind) { if (args.length == getopt.ind) {
print("Usage: k8 bwa-helper.js genalt <alt.sam> [aln.sam]"); print("Usage: k8 bwa-helper.js altgen <alt.sam> [aln.sam]");
exit(1); exit(1);
} }
@ -690,22 +690,16 @@ function bwa_genalt(args)
} }
// This is in effect a simplified version of bwa_genalt(). // This is in effect a simplified version of bwa_genalt().
function bwa_postalt(args) function bwa_altlift(args)
{ {
var c, idx = {}, opt = { verbose:3, min_pa:0.8 }; if (args.length == 0) {
while ((c = getopt(args, 'v:r:a:')) != null) { print("Usage: k8 bwa-helper.js altlift <alt-to-ref.sam> [aln.sam]");
if (c == 'v') opt.verbose = parseInt(getopt.arg);
else if (c == 'r') opt.min_pa = parseFloat(getopt.arg);
else if (c == 'a') idx = read_ALT_sam(getopt.arg);
}
if (args.length == getopt.ind) {
print("Usage: k8 bwa-helper.js postalt [-r "+opt.min_pa+"] [-a alt.sam] [aln.sam]");
exit(1); exit(1);
} }
var idx = read_ALT_sam(args[0]);
// process SAM // process SAM
var file = args.length - getopt.ind >= 1? new File(args[getopt.ind]) : new File(); var file = args.length >= 2? new File(args[1]) : new File();
var buf = new Bytes(); var buf = new Bytes();
while (file.readline(buf) >= 0) { while (file.readline(buf) >= 0) {
var m, line = buf.toString(); var m, line = buf.toString();
@ -720,36 +714,27 @@ function bwa_postalt(args)
var h = parse_hit([t[2], ((flag&16)?'-':'+') + t[3], t[5], NM], opt); var h = parse_hit([t[2], ((flag&16)?'-':'+') + t[3], t[5], NM], opt);
// lift mapping positions to coordinates on the primary assembly // lift mapping positions to coordinates on the primary assembly
{ var a = null;
var a = null; if (idx[h.ctg] != null)
if (idx[h.ctg] != null) a = idx[h.ctg](h.start, h.end);
a = idx[h.ctg](h.start, h.end); if (a == null) a = [];
if (a == null) a = [];
// find the approximate position on the primary assembly // find the approximate position on the primary assembly
var lifted = []; var lifted = [];
for (var j = 0; j < a.length; ++j) { for (var j = 0; j < a.length; ++j) {
var s, e; var s, e;
if (!a[j][4]) { // ALT is mapped to the forward strand of the primary assembly if (!a[j][4]) { // ALT is mapped to the forward strand of the primary assembly
s = cigar2pos(a[j][6], h.start); s = cigar2pos(a[j][6], h.start);
e = cigar2pos(a[j][6], h.end - 1) + 1; e = cigar2pos(a[j][6], h.end - 1) + 1;
} else { } else {
s = cigar2pos(a[j][6], a[j][2] - h.end); s = cigar2pos(a[j][6], a[j][2] - h.end);
e = cigar2pos(a[j][6], a[j][2] - h.start - 1) + 1; e = cigar2pos(a[j][6], a[j][2] - h.start - 1) + 1;
}
if (s < 0 || e < 0) continue; // read is mapped to clippings in the ALT-to-chr alignment
s += a[j][5]; e += a[j][5];
lifted.push([a[j][3], (h.rev!=a[j][4]), s, e]);
} }
h.lifted = lifted; if (s < 0 || e < 0) continue; // read is mapped to clippings in the ALT-to-chr alignment
} s += a[j][5]; e += a[j][5];
lifted.push([a[j][3], (h.rev!=a[j][4]), s, e]);
// update mapQ if necessary
var ori_mapQ = null;
if ((m = /\tpa:f:(\d+\.\d+)/.exec(line)) != null) {
if (parseFloat(m[1]) < opt.min_pa)
ori_mapQ = t[4], t[4] = 0;
} }
h.lifted = lifted;
// generate lifted_str // generate lifted_str
if (h.lifted && h.lifted.length) { if (h.lifted && h.lifted.length) {
@ -761,7 +746,6 @@ function bwa_postalt(args)
} else h.lifted_str = null; } else h.lifted_str = null;
// print // print
if (ori_mapQ != null) t.push("om:i:"+ori_mapQ);
if (h.lifted_str) t.push("lt:Z:" + h.lifted_str); if (h.lifted_str) t.push("lt:Z:" + h.lifted_str);
print(t.join("\t")); print(t.join("\t"));
} }
@ -777,8 +761,8 @@ function main(args)
{ {
if (args.length == 0) { if (args.length == 0) {
print("\nUsage: k8 bwa-helper.js <command> [arguments]\n"); print("\nUsage: k8 bwa-helper.js <command> [arguments]\n");
print("Commands: postalt post process ALT-aware alignments"); print("Commands: altlift add lt tag to show lifted position on the primary assembly");
print(" genalt generate ALT alignments for ALT-unaware alignments"); print(" altgen generate ALT alignments for ALT-unaware alignments\n");
print(" sam2pas convert SAM to pairwise alignment summary format (PAS)"); print(" sam2pas convert SAM to pairwise alignment summary format (PAS)");
print(" pas2reg extract covered regions"); print(" pas2reg extract covered regions");
print(" reg2cut regions to extract for the 2nd round bwa-mem"); print(" reg2cut regions to extract for the 2nd round bwa-mem");
@ -795,8 +779,8 @@ function main(args)
else if (cmd == 'markovlp') bwa_markOvlp(args); else if (cmd == 'markovlp') bwa_markOvlp(args);
else if (cmd == 'pas2reg') bwa_pas2reg(args); else if (cmd == 'pas2reg') bwa_pas2reg(args);
else if (cmd == 'reg2cut') bwa_reg2cut(args); else if (cmd == 'reg2cut') bwa_reg2cut(args);
else if (cmd == 'genalt') bwa_genalt(args); else if (cmd == 'altgen' || cmd == 'genalt') bwa_alt(args);
else if (cmd == 'postalt') bwa_postalt(args); else if (cmd == 'altlift') bwa_altlift(args);
else if (cmd == 'shortname') bwa_shortname(args); else if (cmd == 'shortname') bwa_shortname(args);
else warn("Unrecognized command"); else warn("Unrecognized command");
} }

View File

@ -527,7 +527,7 @@ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id
if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt) if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt)
p->alt_sc = a[p->secondary].score; p->alt_sc = a[p->secondary].score;
} }
if (n_pri > 0 || n_pri != n) { if (n_pri > 0 && n_pri < n) {
ks_introsort(mem_ars_hash2, n, a); ks_introsort(mem_ars_hash2, n, a);
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1; for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
mem_mark_primary_se_core(opt, n_pri, a, &z); mem_mark_primary_se_core(opt, n_pri, a, &z);

View File

@ -151,6 +151,7 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
memset(&b, 0, sizeof(mem_alnreg_t)); memset(&b, 0, sizeof(mem_alnreg_t));
if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
b.rid = a->rid; b.rid = a->rid;
b.is_alt = a->is_alt;
b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb; b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb;
b.qe = is_rev? l_ms - aln.qb : aln.qe + 1; b.qe = is_rev? l_ms - aln.qb : aln.qe + 1;
b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb; b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb;

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h" #include "utils.h"
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r855-dirty" #define PACKAGE_VERSION "0.7.10-r858-dirty"
#endif #endif
int bwa_fa2pac(int argc, char *argv[]); int bwa_fa2pac(int argc, char *argv[]);