r878: XA is given to the best alignment
Non-ALT hits may get ALT hits in the XA tag. This will simplify haplotype assignment.
This commit is contained in:
parent
0a2cf98293
commit
a03d01f944
20
bwamem.c
20
bwamem.c
|
|
@ -512,38 +512,38 @@ static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *
|
||||||
|
|
||||||
int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id)
|
int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id)
|
||||||
{
|
{
|
||||||
int i, j, n_pri;
|
int i, n_pri;
|
||||||
int_v z = {0,0,0};
|
int_v z = {0,0,0};
|
||||||
if (n == 0) return 0;
|
if (n == 0) return 0;
|
||||||
for (i = n_pri = 0; i < n; ++i) {
|
for (i = n_pri = 0; i < n; ++i) {
|
||||||
a[i].sub = a[i].alt_sc = 0, a[i].secondary = a[i].secondary_alt = -1, a[i].hash = hash_64(id+i);
|
a[i].sub = a[i].alt_sc = 0, a[i].secondary = a[i].secondary_all = -1, a[i].hash = hash_64(id+i);
|
||||||
if (!a[i].is_alt) ++n_pri;
|
if (!a[i].is_alt) ++n_pri;
|
||||||
}
|
}
|
||||||
ks_introsort(mem_ars_hash, n, a);
|
ks_introsort(mem_ars_hash, n, a);
|
||||||
mem_mark_primary_se_core(opt, n, a, &z);
|
mem_mark_primary_se_core(opt, n, a, &z);
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
mem_alnreg_t *p = &a[i];
|
mem_alnreg_t *p = &a[i];
|
||||||
p->secondary_alt = i; // keep the rank in the first round
|
p->secondary_all = i; // keep the rank in the first round
|
||||||
if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt)
|
if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt)
|
||||||
p->alt_sc = a[p->secondary].score;
|
p->alt_sc = a[p->secondary].score;
|
||||||
}
|
}
|
||||||
if (n_pri >= 0 && n_pri < n) {
|
if (n_pri >= 0 && n_pri < n) {
|
||||||
kv_resize(int, z, n);
|
kv_resize(int, z, n);
|
||||||
if (n_pri > 0) ks_introsort(mem_ars_hash2, n, a);
|
if (n_pri > 0) ks_introsort(mem_ars_hash2, n, a);
|
||||||
for (i = 0; i < n; ++i) z.a[a[i].secondary_alt] = i;
|
for (i = 0; i < n; ++i) z.a[a[i].secondary_all] = i;
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
if (a[i].secondary < 0) {
|
if (a[i].secondary >= 0) {
|
||||||
a[i].secondary_alt = -1;
|
a[i].secondary_all = z.a[a[i].secondary];
|
||||||
continue;
|
|
||||||
}
|
|
||||||
j = z.a[a[i].secondary];
|
|
||||||
a[i].secondary_alt = a[j].is_alt? j : -1;
|
|
||||||
if (a[i].is_alt) a[i].secondary = INT_MAX;
|
if (a[i].is_alt) a[i].secondary = INT_MAX;
|
||||||
|
} else a[i].secondary_all = -1;
|
||||||
}
|
}
|
||||||
if (n_pri > 0) { // mark primary for hits to the primary assembly only
|
if (n_pri > 0) { // mark primary for hits to the primary assembly only
|
||||||
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
|
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
|
||||||
mem_mark_primary_se_core(opt, n_pri, a, &z);
|
mem_mark_primary_se_core(opt, n_pri, a, &z);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
a[i].secondary_all = a[i].secondary;
|
||||||
}
|
}
|
||||||
free(z.a);
|
free(z.a);
|
||||||
return n_pri;
|
return n_pri;
|
||||||
|
|
|
||||||
2
bwamem.h
2
bwamem.h
|
|
@ -69,7 +69,7 @@ typedef struct {
|
||||||
int w; // actual band width used in extension
|
int w; // actual band width used in extension
|
||||||
int seedcov; // length of regions coverged by seeds
|
int seedcov; // length of regions coverged by seeds
|
||||||
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
||||||
int secondary_alt;
|
int secondary_all;
|
||||||
int seedlen0; // length of the starting seed
|
int seedlen0; // length of the starting seed
|
||||||
int n_comp:30, is_alt:2; // number of sub-alignments chained together
|
int n_comp:30, is_alt:2; // number of sub-alignments chained together
|
||||||
float frac_rep;
|
float frac_rep;
|
||||||
|
|
|
||||||
|
|
@ -112,34 +112,35 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void get_pri_idx(double XA_drop_ratio, const mem_alnreg_t *a, int i, int r[2])
|
static inline int get_pri_idx(double XA_drop_ratio, const mem_alnreg_t *a, int i)
|
||||||
{
|
{
|
||||||
int j = a[i].secondary, k = a[i].secondary_alt;
|
int k = a[i].secondary_all;
|
||||||
r[0] = r[1] = -1;
|
if (k >= 0 && a[i].score >= a[k].score * XA_drop_ratio) return k;
|
||||||
if (j >= 0 && j < INT_MAX && !a[j].is_alt && !a[i].is_alt && a[i].score >= a[j].score * XA_drop_ratio) r[0] = j;
|
return -1;
|
||||||
if (k >= 0 && a[k].is_alt && a[i].score >= a[k].score * XA_drop_ratio) r[1] = k;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
|
// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
|
||||||
char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query) // ONLY work after mem_mark_primary_se()
|
char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query) // ONLY work after mem_mark_primary_se()
|
||||||
{
|
{
|
||||||
int i, k, *cnt, tot, r[2];
|
int i, k, r, *cnt, tot;
|
||||||
kstring_t *aln = 0, str = {0,0,0};
|
kstring_t *aln = 0, str = {0,0,0};
|
||||||
char **XA = 0;
|
char **XA = 0, *has_alt;
|
||||||
|
|
||||||
cnt = calloc(a->n, sizeof(int));
|
cnt = calloc(a->n, sizeof(int));
|
||||||
|
has_alt = calloc(a->n, 1);
|
||||||
for (i = 0, tot = 0; i < a->n; ++i) {
|
for (i = 0, tot = 0; i < a->n; ++i) {
|
||||||
get_pri_idx(opt->XA_drop_ratio, a->a, i, r);
|
r = get_pri_idx(opt->XA_drop_ratio, a->a, i);
|
||||||
if (r[0] >= 0) ++cnt[r[0]], ++tot;
|
if (r >= 0) {
|
||||||
if (r[1] >= 0) ++cnt[r[1]], ++tot;
|
++cnt[r], ++tot;
|
||||||
|
if (a->a[i].is_alt) has_alt[r] = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (tot == 0) goto end_gen_alt;
|
if (tot == 0) goto end_gen_alt;
|
||||||
aln = calloc(a->n, sizeof(kstring_t));
|
aln = calloc(a->n, sizeof(kstring_t));
|
||||||
for (i = 0; i < a->n; ++i) {
|
for (i = 0; i < a->n; ++i) {
|
||||||
mem_aln_t t;
|
mem_aln_t t;
|
||||||
get_pri_idx(opt->XA_drop_ratio, a->a, i, r);
|
if ((r = get_pri_idx(opt->XA_drop_ratio, a->a, i)) < 0) continue;
|
||||||
if (r[0] < 0 && r[1] < 0) continue;
|
if (cnt[r] > opt->max_XA_hits_alt || (!has_alt[r] && cnt[r] > opt->max_XA_hits)) continue;
|
||||||
if ((r[0] >= 0 && cnt[r[0]] > opt->max_XA_hits) || (r[1] >= 0 && cnt[r[1]] > opt->max_XA_hits_alt)) continue;
|
|
||||||
t = mem_reg2aln(opt, bns, pac, l_query, query, &a->a[i]);
|
t = mem_reg2aln(opt, bns, pac, l_query, query, &a->a[i]);
|
||||||
str.l = 0;
|
str.l = 0;
|
||||||
kputs(bns->anns[t.rid].name, &str);
|
kputs(bns->anns[t.rid].name, &str);
|
||||||
|
|
@ -152,14 +153,13 @@ char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
||||||
kputc(',', &str); kputw(t.NM, &str);
|
kputc(',', &str); kputw(t.NM, &str);
|
||||||
kputc(';', &str);
|
kputc(';', &str);
|
||||||
free(t.cigar);
|
free(t.cigar);
|
||||||
if (r[0] >= 0 && cnt[r[0]] <= opt->max_XA_hits) kputsn(str.s, str.l, &aln[r[0]]);
|
kputsn(str.s, str.l, &aln[r]);
|
||||||
if (r[1] >= 0 && cnt[r[1]] <= opt->max_XA_hits_alt) kputsn(str.s, str.l, &aln[r[1]]);
|
|
||||||
}
|
}
|
||||||
XA = calloc(a->n, sizeof(char*));
|
XA = calloc(a->n, sizeof(char*));
|
||||||
for (k = 0; k < a->n; ++k)
|
for (k = 0; k < a->n; ++k)
|
||||||
XA[k] = aln[k].s;
|
XA[k] = aln[k].s;
|
||||||
|
|
||||||
end_gen_alt:
|
end_gen_alt:
|
||||||
free(cnt); free(aln); free(str.s);
|
free(has_alt); free(cnt); free(aln); free(str.s);
|
||||||
return XA;
|
return XA;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -4,7 +4,7 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef PACKAGE_VERSION
|
#ifndef PACKAGE_VERSION
|
||||||
#define PACKAGE_VERSION "0.7.10-r876-dirty"
|
#define PACKAGE_VERSION "0.7.10-r878-dirty"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[]);
|
int bwa_fa2pac(int argc, char *argv[]);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue