Merge branch 'master' into master_fixes
Conflicts: bwamem.c bwamem_pair.c example.c
This commit is contained in:
commit
cca27c1ef5
6
bwa.1
6
bwa.1
|
|
@ -1,4 +1,4 @@
|
||||||
.TH bwa 1 "9 March 2013" "bwa-0.7.2" "Bioinformatics tools"
|
.TH bwa 1 "13 March 2013" "bwa-0.7.3" "Bioinformatics tools"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.PP
|
.PP
|
||||||
bwa - Burrows-Wheeler Alignment Tool
|
bwa - Burrows-Wheeler Alignment Tool
|
||||||
|
|
@ -574,11 +574,13 @@ XM Number of mismatches in the alignment
|
||||||
XO Number of gap opens
|
XO Number of gap opens
|
||||||
XG Number of gap extentions
|
XG Number of gap extentions
|
||||||
XT Type: Unique/Repeat/N/Mate-sw
|
XT Type: Unique/Repeat/N/Mate-sw
|
||||||
XA Alternative hits; format: (chr,pos,CIGAR,NM;)*
|
XA Alternative hits; format: /(chr,pos,CIGAR,NM;)*/
|
||||||
_
|
_
|
||||||
XS Suboptimal alignment score
|
XS Suboptimal alignment score
|
||||||
XF Support from forward/reverse alignment
|
XF Support from forward/reverse alignment
|
||||||
XE Number of supporting seeds
|
XE Number of supporting seeds
|
||||||
|
_
|
||||||
|
XP Alt primary hits; format: /(chr,pos,CIGAR;mapQ,NM;)+/
|
||||||
.TE
|
.TE
|
||||||
|
|
||||||
.PP
|
.PP
|
||||||
|
|
|
||||||
224
bwamem.c
224
bwamem.c
|
|
@ -607,83 +607,73 @@ static inline int infer_bw(int l1, int l2, int score, int a, int q, int r)
|
||||||
return w;
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, const bwahit_t *p_, int is_hard, const bwahit_t *m)
|
static inline int get_rlen(int n_cigar, const uint32_t *cigar)
|
||||||
{
|
{
|
||||||
#define is_mapped(x) ((x)->rb >= 0 && (x)->rb < (x)->re && (x)->re <= bns->l_pac<<1)
|
int k, l;
|
||||||
int score, n_cigar, is_rev = 0, rid, mid, copy_mate = 0, NM = -1;
|
for (k = l = 0; k < n_cigar; ++k) {
|
||||||
uint32_t *cigar = 0;
|
int op = cigar[k]&0xf;
|
||||||
int64_t pos = -1;
|
if (op == 0 || op == 2)
|
||||||
bwahit_t ptmp, *p = &ptmp;
|
l += cigar[k]>>4;
|
||||||
|
}
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
if (!p_) { // in this case, generate an unmapped alignment
|
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
|
||||||
memset(&ptmp, 0, sizeof(bwahit_t));
|
{
|
||||||
ptmp.rb = ptmp.re = -1;
|
int i;
|
||||||
} else ptmp = *p_;
|
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
|
||||||
p->flag |= m? 1 : 0; // is paired in sequencing
|
|
||||||
p->flag |= !is_mapped(p)? 4 : 0; // is mapped
|
if (m_) mtmp = *m_, m = &mtmp;
|
||||||
p->flag |= m && !is_mapped(m)? 8 : 0; // is mate mapped
|
// set flag
|
||||||
if (m && !is_mapped(p) && is_mapped(m)) {
|
p->flag |= m? 0x1 : 0; // is paired in sequencing
|
||||||
p->rb = m->rb; p->re = m->re; p->qb = 0; p->qe = s->l_seq;
|
p->flag |= p->rid < 0? 0x4 : 0; // is mapped
|
||||||
copy_mate = 1;
|
p->flag |= m && m->rid < 0? 0x8 : 0; // is mate mapped
|
||||||
|
if (p->rid < 0 && m && m->rid >= 0) // copy mate to alignment
|
||||||
|
p->rid = m->rid, p->pos = m->pos, p->is_rev = m->is_rev, p->n_cigar = 0;
|
||||||
|
if (m && m->rid < 0 && p->rid >= 0) // copy alignment to mate
|
||||||
|
m->rid = p->rid, m->pos = p->pos, m->is_rev = p->is_rev, m->n_cigar = 0;
|
||||||
|
p->flag |= p->is_rev? 0x10 : 0; // is on the reverse strand
|
||||||
|
p->flag |= m && m->is_rev? 0x20 : 0; // is mate on the reverse strand
|
||||||
|
|
||||||
|
// print up to CIGAR
|
||||||
|
kputs(s->name, str); kputc('\t', str); // QNAME
|
||||||
|
kputw((p->flag&0xffff) | (p->flag&0x10000? 0x100 : 0), str); kputc('\t', str); // FLAG
|
||||||
|
if (p->rid >= 0) { // with coordinate
|
||||||
|
kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME
|
||||||
|
kputl(p->pos + 1, str); kputc('\t', str); // POS
|
||||||
|
kputw(p->mapq, str); kputc('\t', str); // MAPQ
|
||||||
|
if (p->n_cigar) { // aligned
|
||||||
|
for (i = 0; i < p->n_cigar; ++i) {
|
||||||
|
kputw(p->cigar[i]>>4, str); kputc("MIDSH"[p->cigar[i]&0xf], str);
|
||||||
}
|
}
|
||||||
p->flag |= p->rb >= bns->l_pac? 0x10 : 0; // is reverse strand
|
} else kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true)
|
||||||
p->flag |= m && m->rb >= bns->l_pac? 0x20 : 0; // is mate on reverse strand
|
} else kputsn("*\t0\t0\t*", 7, str); // without coordinte
|
||||||
if (is_mapped(p) && m && !is_mapped(m) && (p->flag&0x10)) p->flag |= 0x20; // if mate is unmapped, it takes the strand of the current read
|
|
||||||
kputs(s->name, str); kputc('\t', str);
|
|
||||||
if (is_mapped(p)) { // has a coordinate, no matter whether it is mapped or copied from the mate
|
|
||||||
int sam_flag = p->flag&0xff; // the flag that will be outputed to SAM; it is not always the same as p->flag
|
|
||||||
if (p->flag&0x10000) sam_flag |= 0x100;
|
|
||||||
if (!copy_mate) {
|
|
||||||
int w2;
|
|
||||||
w2 = infer_bw(p->qe - p->qb, p->re - p->rb, p->score, mat[0], q, r);
|
|
||||||
w2 = w2 < w? w2 : w;
|
|
||||||
cigar = bwa_gen_cigar(mat, q, r, w2, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar, &NM);
|
|
||||||
p->flag |= n_cigar == 0? 4 : 0; // FIXME: check why this may happen (this has already happened)
|
|
||||||
} else n_cigar = 0, cigar = 0;
|
|
||||||
pos = bns_depos(bns, p->rb < bns->l_pac? p->rb : p->re - 1, &is_rev);
|
|
||||||
bns_cnt_ambi(bns, pos, p->re - p->rb, &rid);
|
|
||||||
kputw(sam_flag, str); kputc('\t', str);
|
|
||||||
kputs(bns->anns[rid].name, str); kputc('\t', str); kputuw(pos - bns->anns[rid].offset + 1, str); kputc('\t', str);
|
|
||||||
kputw(p->qual, str); kputc('\t', str);
|
|
||||||
if (n_cigar) {
|
|
||||||
int i, clip5, clip3;
|
|
||||||
clip5 = is_rev? s->l_seq - p->qe : p->qb;
|
|
||||||
clip3 = is_rev? p->qb : s->l_seq - p->qe;
|
|
||||||
if (clip5) { kputw(clip5, str); kputc("SH"[(is_hard!=0)], str); }
|
|
||||||
for (i = 0; i < n_cigar; ++i) {
|
|
||||||
kputw(cigar[i]>>4, str); kputc("MIDSH"[cigar[i]&0xf], str);
|
|
||||||
}
|
|
||||||
if (clip3) { kputw(clip3, str); kputc("SH"[(is_hard!=0)], str); }
|
|
||||||
} else kputc('*', str);
|
|
||||||
} else { // no coordinate
|
|
||||||
kputw(p->flag, str);
|
|
||||||
kputs("\t*\t0\t0\t*", str);
|
|
||||||
rid = -1;
|
|
||||||
}
|
|
||||||
if (m && is_mapped(m)) { // then print mate pos and isize
|
|
||||||
pos = bns_depos(bns, m->rb < bns->l_pac? m->rb : m->re - 1, &is_rev);
|
|
||||||
bns_cnt_ambi(bns, pos, m->re - m->rb, &mid);
|
|
||||||
kputc('\t', str);
|
kputc('\t', str);
|
||||||
if (mid == rid) kputc('=', str);
|
|
||||||
else kputs(bns->anns[mid].name, str);
|
// print the mate position if applicable
|
||||||
kputc('\t', str); kputuw(pos - bns->anns[mid].offset + 1, str);
|
if (m && m->rid >= 0) {
|
||||||
|
if (p->rid == m->rid) kputc('=', str);
|
||||||
|
else kputs(bns->anns[m->rid].name, str);
|
||||||
kputc('\t', str);
|
kputc('\t', str);
|
||||||
if (mid == rid) {
|
kputl(m->pos + 1, str); kputc('\t', str);
|
||||||
int64_t p0 = p->rb < bns->l_pac? p->rb : (bns->l_pac<<1) - 1 - p->rb;
|
if (p->rid == m->rid) {
|
||||||
int64_t p1 = m->rb < bns->l_pac? m->rb : (bns->l_pac<<1) - 1 - m->rb;
|
int64_t p0 = p->pos + (p->is_rev? get_rlen(p->n_cigar, p->cigar) - 1 : 0);
|
||||||
kputw(-(p0 - p1 + (p0 > p1? 1 : p0 < p1? -1 : 0)), str);
|
int64_t p1 = m->pos + (m->is_rev? get_rlen(m->n_cigar, m->cigar) - 1 : 0);
|
||||||
} else kputw(0, str);
|
if (m->n_cigar == 0 || p->n_cigar == 0) kputc('0', str);
|
||||||
|
else kputl(-(p0 - p1 + (p0 > p1? 1 : p0 < p1? -1 : 0)), str);
|
||||||
|
} else kputc('0', str);
|
||||||
|
} else kputsn("*\t0\t0", 5, str);
|
||||||
kputc('\t', str);
|
kputc('\t', str);
|
||||||
} else if (m && is_mapped(p)) { // then copy the position
|
|
||||||
kputsn("\t=\t", 3, str);
|
// print SEQ and QUAL
|
||||||
kputuw(pos - bns->anns[rid].offset + 1, str);
|
if (p->flag & 0x100) { // for secondary alignments, don't write SEQ and QUAL
|
||||||
kputsn("\t0\t", 3, str);
|
|
||||||
} else kputsn("\t*\t0\t0\t", 7, str);
|
|
||||||
if (p->flag&0x100) { // for secondary alignments, don't write SEQ and QUAL
|
|
||||||
kputsn("*\t*", 3, str);
|
kputsn("*\t*", 3, str);
|
||||||
} else if (!(p->flag&0x10)) { // print SEQ and QUAL, the forward strand
|
} else if (!p->is_rev) { // the forward strand
|
||||||
int i, qb = 0, qe = s->l_seq;
|
int i, qb = 0, qe = s->l_seq;
|
||||||
if (!(p->flag&4) && is_hard) qb = p->qb, qe = p->qe;
|
if (p->n_cigar) {
|
||||||
|
if ((p->cigar[0]&0xf) == 4) qb += p->cigar[0]>>4;
|
||||||
|
if ((p->cigar[p->n_cigar-1]&0xf) == 4) qe -= p->cigar[p->n_cigar-1]>>4;
|
||||||
|
}
|
||||||
ks_resize(str, str->l + (qe - qb) + 1);
|
ks_resize(str, str->l + (qe - qb) + 1);
|
||||||
for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]];
|
for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]];
|
||||||
kputc('\t', str);
|
kputc('\t', str);
|
||||||
|
|
@ -694,7 +684,10 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
|
||||||
} else kputc('*', str);
|
} else kputc('*', str);
|
||||||
} else { // the reverse strand
|
} else { // the reverse strand
|
||||||
int i, qb = 0, qe = s->l_seq;
|
int i, qb = 0, qe = s->l_seq;
|
||||||
if (!(p->flag&4) && is_hard) qb = p->qb, qe = p->qe;
|
if (p->n_cigar) {
|
||||||
|
if ((p->cigar[0]&0xf) == 4) qe -= p->cigar[0]>>4;
|
||||||
|
if ((p->cigar[p->n_cigar-1]&0xf) == 4) qb += p->cigar[p->n_cigar-1]>>4;
|
||||||
|
}
|
||||||
ks_resize(str, str->l + (qe - qb) + 1);
|
ks_resize(str, str->l + (qe - qb) + 1);
|
||||||
for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]];
|
for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]];
|
||||||
kputc('\t', str);
|
kputc('\t', str);
|
||||||
|
|
@ -704,14 +697,33 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
|
||||||
str->s[str->l] = 0;
|
str->s[str->l] = 0;
|
||||||
} else kputc('*', str);
|
} else kputc('*', str);
|
||||||
}
|
}
|
||||||
if (NM >= 0) { kputsn("\tNM:i:", 6, str); kputw(NM, str); }
|
|
||||||
|
// print optional tags
|
||||||
|
if (p->n_cigar) { kputsn("\tNM:i:", 6, str); kputw(p->NM, str); }
|
||||||
if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); }
|
if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); }
|
||||||
if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); }
|
if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); }
|
||||||
if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); }
|
if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); }
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
if (i != which && !(list[i].flag&0x20000)) break; // 0x20000: shadowed multi hit
|
||||||
|
if (i < n) { // there are other primary hits; output them
|
||||||
|
kputsn("\tXP:Z:", 6, str);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
const mem_aln_t *r = &list[i];
|
||||||
|
int k;
|
||||||
|
if (i == which || (list[i].flag&0x20000)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
|
||||||
|
kputs(bns->anns[r->rid].name, str); kputc(',', str);
|
||||||
|
kputc("+-"[r->is_rev], str);
|
||||||
|
kputl(r->pos+1, str); kputc(',', str);
|
||||||
|
for (k = 0; k < r->n_cigar; ++k) {
|
||||||
|
kputw(r->cigar[k]>>4, str); kputc("MIDSH"[r->cigar[k]&0xf], str);
|
||||||
|
}
|
||||||
|
kputc(',', str); kputw(r->mapq, str);
|
||||||
|
kputc(',', str); kputw(r->NM, str);
|
||||||
|
kputc(';', str);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
||||||
kputc('\n', str);
|
kputc('\n', str);
|
||||||
free(cigar);
|
|
||||||
#undef is_mapped
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************
|
/************************
|
||||||
|
|
@ -734,42 +746,36 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
|
||||||
return mapq;
|
return mapq;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h)
|
void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
|
||||||
{
|
{
|
||||||
h->rb = a->rb; h->re = a->re; h->qb = a->qb; h->qe = a->qe;
|
|
||||||
h->score = a->score;
|
|
||||||
h->sub = a->secondary >= 0? -1 : a->sub > a->csub? a->sub : a->csub;
|
|
||||||
h->qual = 0; // quality unset
|
|
||||||
h->flag = a->secondary >= 0? 0x100 : 0; // only the "secondary" bit is set
|
|
||||||
}
|
|
||||||
|
|
||||||
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const bwahit_t *m)
|
|
||||||
{
|
|
||||||
int k;
|
|
||||||
kstring_t str;
|
kstring_t str;
|
||||||
str.l = str.m = 0; str.s = 0;
|
str.l = str.m = 0; str.s = 0;
|
||||||
if (a->n > 0 && a->a[0].score >= opt->T) {
|
if (a->n > 0 && a->a[0].score >= opt->T) {
|
||||||
int mapq0 = -1;
|
int k;
|
||||||
|
kvec_t(mem_aln_t) aa;
|
||||||
|
kv_init(aa);
|
||||||
for (k = 0; k < a->n; ++k) {
|
for (k = 0; k < a->n; ++k) {
|
||||||
bwahit_t h;
|
|
||||||
mem_alnreg_t *p = &a->a[k];
|
mem_alnreg_t *p = &a->a[k];
|
||||||
|
mem_aln_t *q;
|
||||||
if (p->score < opt->T) continue;
|
if (p->score < opt->T) continue;
|
||||||
if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
|
if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
|
||||||
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue;
|
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue;
|
||||||
mem_alnreg2hit(p, &h);
|
q = kv_pushp(mem_aln_t, aa);
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s->seq, &h.qb, &h.qe, &h.rb, &h.re);
|
*q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
|
||||||
h.flag |= extra_flag;
|
q->flag |= extra_flag | (p->secondary >= 0? 0x100 : 0); // flag secondary
|
||||||
if ((opt->flag&MEM_F_NO_MULTI) && k && p->secondary < 0) h.flag |= 0x10000; // print the sequence, but flag as secondary (for Picard)
|
if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score
|
||||||
h.qual = p->secondary >= 0? 0 : mem_approx_mapq_se(opt, p);
|
if ((opt->flag&MEM_F_NO_MULTI) && k && p->secondary < 0) q->flag |= 0x10000;
|
||||||
if (k == 0) mapq0 = h.qual;
|
if (k && q->mapq > aa.a[0].mapq) q->mapq = aa.a[0].mapq;
|
||||||
else if (h.qual > mapq0) h.qual = mapq0;
|
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, p->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
|
||||||
}
|
}
|
||||||
|
for (k = 0; k < aa.n; ++k)
|
||||||
|
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m);
|
||||||
|
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||||
|
free(aa.a);
|
||||||
} else {
|
} else {
|
||||||
bwahit_t h;
|
mem_aln_t t;
|
||||||
memset(&h, 0, sizeof(bwahit_t));
|
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
||||||
h.rb = h.re = -1; h.flag = extra_flag;
|
t.flag |= extra_flag;
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
mem_aln2sam(bns, &str, s, 1, &t, 0, m);
|
||||||
}
|
}
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
}
|
}
|
||||||
|
|
@ -816,15 +822,22 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *
|
||||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
||||||
{
|
{
|
||||||
mem_aln_t a;
|
mem_aln_t a;
|
||||||
int i, w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
int i, w2, qb, qe, NM, score, is_rev;
|
||||||
int64_t pos, rb = ar->rb, re = ar->re;
|
int64_t pos, rb, re;
|
||||||
uint8_t *query;
|
uint8_t *query;
|
||||||
|
|
||||||
|
memset(&a, 0, sizeof(mem_aln_t));
|
||||||
|
if (ar == 0 || ar->rb < 0 || ar->re < 0) { // generate an unmapped record
|
||||||
|
a.rid = -1; a.pos = -1; a.flag |= 0x4;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
qb = ar->qb, qe = ar->qe;
|
||||||
|
rb = ar->rb, re = ar->re;
|
||||||
query = xmalloc(l_query);
|
query = xmalloc(l_query);
|
||||||
for (i = 0; i < l_query; ++i) // convert to the nt4 encoding
|
for (i = 0; i < l_query; ++i) // convert to the nt4 encoding
|
||||||
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
|
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
|
||||||
memset(&a, 0, sizeof(mem_aln_t));
|
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
|
||||||
a.mapq = mem_approx_mapq_se(opt, ar);
|
if (ar->secondary >= 0) a.flag |= 0x20000;
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||||
w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r);
|
w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r);
|
||||||
w2 = w2 < opt->w? w2 : opt->w;
|
w2 = w2 < opt->w? w2 : opt->w;
|
||||||
|
|
@ -839,13 +852,14 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
||||||
a.cigar = xrealloc(a.cigar, 4 * (a.n_cigar + 2));
|
a.cigar = xrealloc(a.cigar, 4 * (a.n_cigar + 2));
|
||||||
if (clip5) {
|
if (clip5) {
|
||||||
memmove(a.cigar+1, a.cigar, a.n_cigar * 4);
|
memmove(a.cigar+1, a.cigar, a.n_cigar * 4);
|
||||||
a.cigar[0] = clip5<<4|3;
|
a.cigar[0] = clip5<<4 | (opt->flag&MEM_F_HARDCLIP? 4 : 3);
|
||||||
++a.n_cigar;
|
++a.n_cigar;
|
||||||
}
|
}
|
||||||
if (clip3) a.cigar[a.n_cigar++] = clip3<<4|3;
|
if (clip3) a.cigar[a.n_cigar++] = clip3<<4 | (opt->flag&MEM_F_HARDCLIP? 4 : 3);
|
||||||
}
|
}
|
||||||
a.rid = bns_pos2rid(bns, pos);
|
a.rid = bns_pos2rid(bns, pos);
|
||||||
a.pos = pos - bns->anns[a.rid].offset;
|
a.pos = pos - bns->anns[a.rid].offset;
|
||||||
|
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
|
||||||
free(query);
|
free(query);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
@ -885,7 +899,7 @@ static void *worker2(void *data)
|
||||||
if (!(w->opt->flag&MEM_F_PE)) {
|
if (!(w->opt->flag&MEM_F_PE)) {
|
||||||
for (i = w->start; i < w->n; i += w->step) {
|
for (i = w->start; i < w->n; i += w->step) {
|
||||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
||||||
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
|
mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
|
||||||
free(w->regs[i].a);
|
free(w->regs[i].a);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
14
bwamem.h
14
bwamem.h
|
|
@ -59,19 +59,15 @@ typedef struct {
|
||||||
double avg, std;
|
double avg, std;
|
||||||
} mem_pestat_t;
|
} mem_pestat_t;
|
||||||
|
|
||||||
typedef struct { // TODO: This is an intermediate struct only. Better get rid of it.
|
|
||||||
int64_t rb, re;
|
|
||||||
int qb, qe, flag, qual;
|
|
||||||
// optional info
|
|
||||||
int score, sub;
|
|
||||||
} bwahit_t;
|
|
||||||
|
|
||||||
typedef struct { // This struct is only used for the convenience of API.
|
typedef struct { // This struct is only used for the convenience of API.
|
||||||
int rid; // reference sequence index in bntseq_t
|
int64_t pos; // forward strand 5'-end mapping position
|
||||||
int pos; // forward strand 5'-end mapping position
|
int rid; // reference sequence index in bntseq_t; <0 for unmapped
|
||||||
|
int flag; // extra flag
|
||||||
uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
|
uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
|
||||||
int n_cigar; // number of CIGAR operations
|
int n_cigar; // number of CIGAR operations
|
||||||
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
||||||
|
|
||||||
|
int score, sub;
|
||||||
} mem_aln_t;
|
} mem_aln_t;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,6 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
|
||||||
|
|
||||||
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
|
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
|
||||||
{
|
{
|
||||||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
|
||||||
pair64_v v, u;
|
pair64_v v, u;
|
||||||
int r, i, k, y[4], ret; // y[] keeps the last hit
|
int r, i, k, y[4], ret; // y[] keeps the last hit
|
||||||
kv_init(v); kv_init(u);
|
kv_init(v); kv_init(u);
|
||||||
|
|
@ -230,15 +229,14 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
||||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||||
{
|
{
|
||||||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a);
|
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a);
|
||||||
extern void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const bwahit_t *m);
|
|
||||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
||||||
extern void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, const bwahit_t *p, int is_hard, const bwahit_t *m);
|
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
|
||||||
|
|
||||||
int n = 0, i, j, z[2], o, subo, n_sub;
|
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
|
||||||
kstring_t str;
|
kstring_t str;
|
||||||
mem_alnreg_v b[2];
|
mem_alnreg_v b[2];
|
||||||
bwahit_t h[2];
|
mem_aln_t h[2];
|
||||||
|
|
||||||
str.l = str.m = 0; str.s = 0;
|
str.l = str.m = 0; str.s = 0;
|
||||||
// perform SW for the best alignment
|
// perform SW for the best alignment
|
||||||
|
|
@ -256,7 +254,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
|
if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
|
||||||
// pairing single-end hits
|
// pairing single-end hits
|
||||||
if (a[0].n && a[1].n && (o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
|
if (a[0].n && a[1].n && (o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
|
||||||
int is_multi[2], q_pe, extra_flag = 1, score_un, q_se[2];
|
int is_multi[2], q_pe, score_un, q_se[2];
|
||||||
// check if an end has multiple hits even after mate-SW
|
// check if an end has multiple hits even after mate-SW
|
||||||
for (i = 0; i < 2; ++i) {
|
for (i = 0; i < 2; ++i) {
|
||||||
for (j = 1; j < a[i].n; ++j)
|
for (j = 1; j < a[i].n; ++j)
|
||||||
|
|
@ -292,27 +290,29 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
|
q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
|
||||||
q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
|
q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
|
||||||
}
|
}
|
||||||
mem_alnreg2hit(&a[0].a[z[0]], &h[0]); h[0].qual = q_se[0]; h[0].flag |= 0x40 | extra_flag;
|
// write SAM
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s[0].seq, &h[0].qb, &h[0].qe, &h[0].rb, &h[0].re);
|
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag;
|
||||||
mem_alnreg2hit(&a[1].a[z[1]], &h[1]); h[1].qual = q_se[1]; h[1].flag |= 0x80 | extra_flag;
|
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag;
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s[1].seq, &h[1].qb, &h[1].qe, &h[1].rb, &h[1].re);
|
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = xstrdup(str.s); str.l = 0;
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->flag&MEM_F_HARDCLIP, &h[1]); s[0].sam = xstrdup(str.s); str.l = 0;
|
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->flag&MEM_F_HARDCLIP, &h[0]); s[1].sam = str.s;
|
free(h[0].cigar); free(h[1].cigar);
|
||||||
} else goto no_pairing;
|
} else goto no_pairing;
|
||||||
return n;
|
return n;
|
||||||
|
|
||||||
no_pairing:
|
no_pairing:
|
||||||
for (i = 0; i < 2; ++i) {
|
for (i = 0; i < 2; ++i) {
|
||||||
if (a[i].n && a[i].a[0].score >= opt->T) {
|
if (a[i].n && a[i].a[0].score >= opt->T)
|
||||||
mem_alnreg2hit(&a[i].a[0], &h[i]);
|
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[0]);
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s[i].seq, &h[i].qb, &h[i].qe, &h[i].rb, &h[i].re);
|
else h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0);
|
||||||
} else {
|
|
||||||
memset(&h[i], 0, sizeof(bwahit_t));
|
|
||||||
h[i].rb = h[i].re = -1;
|
|
||||||
h[i].flag = 1<<(6+i) | 1;
|
|
||||||
}
|
}
|
||||||
|
if (h[0].rid == h[1].rid && h[0].rid >= 0) { // if the top hits from the two ends constitute a proper pair, flag it.
|
||||||
|
int64_t dist;
|
||||||
|
int d;
|
||||||
|
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
||||||
|
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
|
||||||
}
|
}
|
||||||
mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41, &h[1]);
|
mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
|
||||||
mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81, &h[0]);
|
mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
|
||||||
|
free(h[0].cigar); free(h[1].cigar);
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ int main(int argc, char *argv[])
|
||||||
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
||||||
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||||
// print alignment
|
// print alignment
|
||||||
err_printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
|
err_printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq);
|
||||||
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
||||||
err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
|
err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
|
||||||
err_printf("\t%d\n", a.NM); // print edit distance
|
err_printf("\t%d\n", a.NM); // print edit distance
|
||||||
|
|
@ -54,7 +54,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
free(opt);
|
free(opt);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
17
kstring.h
17
kstring.h
|
|
@ -90,6 +90,23 @@ static inline int kputuw(unsigned c, kstring_t *s)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int kputl(long c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
long l, x;
|
||||||
|
if (c == 0) return kputc('0', s);
|
||||||
|
for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)xrealloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int ksprintf(kstring_t *s, const char *fmt, ...);
|
int ksprintf(kstring_t *s, const char *fmt, ...);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue