code backup
This commit is contained in:
parent
f0a6285aba
commit
66585b7982
27
bwamem.c
27
bwamem.c
|
|
@ -31,6 +31,7 @@ mem_opt_t *mem_opt_init()
|
|||
mem_opt_t *o;
|
||||
o = calloc(1, sizeof(mem_opt_t));
|
||||
o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100;
|
||||
o->flag = 0;
|
||||
o->min_seed_len = 19;
|
||||
o->split_width = 10;
|
||||
o->max_occ = 10000;
|
||||
|
|
@ -41,7 +42,6 @@ mem_opt_t *mem_opt_init()
|
|||
o->chunk_size = 10000000;
|
||||
o->n_threads = 1;
|
||||
o->pe_dir = 0<<1|1;
|
||||
o->is_pe = 0;
|
||||
mem_fill_scmat(o->a, o->b, o->mat);
|
||||
return o;
|
||||
}
|
||||
|
|
@ -598,11 +598,11 @@ void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h)
|
|||
h->score = a->score;
|
||||
h->sub = a->sub > a->csub? a->sub : a->csub;
|
||||
h->qual = 0; // quality unset
|
||||
h->flag = a->secondary? 0x100 : 0; // only the "secondary" bit is set
|
||||
h->flag = a->secondary >= 0? 0x100 : 0; // only the "secondary" bit is set
|
||||
h->mb = h->me = -2; // mate positions are unset
|
||||
}
|
||||
|
||||
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
|
||||
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag)
|
||||
{
|
||||
int k;
|
||||
kstring_t str;
|
||||
|
|
@ -612,10 +612,11 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
|||
bwahit_t h;
|
||||
if (a->a[k].secondary >= 0) continue;
|
||||
mem_alnreg2hit(&a->a[k], &h);
|
||||
h.flag |= extra_flag;
|
||||
h.qual = approx_mapq_se(opt, &a->a[k]);
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->is_hard);
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP);
|
||||
}
|
||||
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->is_hard);
|
||||
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP);
|
||||
s->sam = str.s;
|
||||
}
|
||||
|
||||
|
|
@ -657,25 +658,25 @@ static void *worker1(void *data)
|
|||
for (i = w->start; i < w->n; i += w->step) {
|
||||
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
||||
w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a);
|
||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *worker2(void *data)
|
||||
{
|
||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2]);
|
||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
|
||||
worker_t *w = (worker_t*)data;
|
||||
int i;
|
||||
if (!w->opt->is_pe) {
|
||||
if (!(w->opt->flag&MEM_F_PE)) {
|
||||
for (i = 0; i < w->n; i += w->step) {
|
||||
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
|
||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
||||
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0);
|
||||
free(w->regs[i].a);
|
||||
}
|
||||
} else {
|
||||
int n = 0;
|
||||
for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet
|
||||
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, &w->seqs[i<<1], &w->regs[i<<1]);
|
||||
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, i, &w->seqs[i<<1], &w->regs[i<<1]);
|
||||
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
||||
}
|
||||
fprintf(stderr, "[M::%s@%d] performed mate-SW for %d reads\n", __func__, w->start, n);
|
||||
|
|
@ -702,21 +703,21 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns
|
|||
#ifdef HAVE_PTHREAD
|
||||
if (opt->n_threads == 1) {
|
||||
worker1(w);
|
||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
worker2(w);
|
||||
} else {
|
||||
pthread_t *tid;
|
||||
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||
free(tid);
|
||||
}
|
||||
#else
|
||||
worker1(w);
|
||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
worker2(w);
|
||||
#endif
|
||||
for (i = 0; i < n; ++i) {
|
||||
|
|
|
|||
8
bwamem.h
8
bwamem.h
|
|
@ -15,13 +15,17 @@ typedef struct {
|
|||
int32_t qbeg, len;
|
||||
} mem_seed_t;
|
||||
|
||||
#define MEM_F_HARDCLIP 0x1
|
||||
#define MEM_F_PE 0x2
|
||||
#define MEM_F_NOPAIRING 0x4
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r, w;
|
||||
int flag;
|
||||
int split_width;
|
||||
int min_seed_len, max_occ, max_chain_gap;
|
||||
int n_threads, chunk_size;
|
||||
int pe_dir, is_pe;
|
||||
int is_hard; // if to use hard clip
|
||||
int pe_dir;
|
||||
float mask_level, chain_drop_ratio;
|
||||
int max_ins; // maximum insert size
|
||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||
|
|
|
|||
|
|
@ -159,11 +159,11 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
|
|||
return n;
|
||||
}
|
||||
|
||||
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2])
|
||||
uint64_t mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, uint64_t *sub, int z[2])
|
||||
{
|
||||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
||||
pair64_v v;
|
||||
pair64_t o, subo; // score<<32 | raw_score<<8 | hash
|
||||
pair64_t o, subo; // .x: score<<32 | raw_score<<8 | hash; .y: pair
|
||||
int r, i, k, y[4]; // y[] keeps the last hit
|
||||
kv_init(v);
|
||||
for (r = 0; r < 2; ++r) { // loop through read number
|
||||
|
|
@ -198,7 +198,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
ns = (dist - pes[dir].avg) / pes[dir].std;
|
||||
score = (int)(raw_score - 4.343 / 23. * (opt->a + opt->b) * log(erfc(fabs(ns) * M_SQRT1_2)) + .499);
|
||||
pair = (uint64_t)k<<32 | i;
|
||||
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair)&0xff);
|
||||
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair ^ id<<8)&0xff);
|
||||
if (x > o.x) subo = o, o.x = x, o.y = pair;
|
||||
else if (x > subo.x) subo.x = x, subo.y = pair;
|
||||
}
|
||||
|
|
@ -207,19 +207,24 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
}
|
||||
if (o.x > 0) {
|
||||
i = o.y >> 32; k = o.y << 32 >> 32;
|
||||
mem_alnreg2hit(&a[v.a[i].y&1].a[v.a[i].y<<32>>34], &h[v.a[i].y&1]);
|
||||
mem_alnreg2hit(&a[v.a[k].y&1].a[v.a[k].y<<32>>34], &h[v.a[k].y&1]);
|
||||
z[v.a[i].y&1] = v.a[i].y<<32>>34;
|
||||
z[v.a[k].y&1] = v.a[k].y<<32>>34;
|
||||
}
|
||||
free(v.a);
|
||||
return o.x == 0? -1 : 0;
|
||||
*sub = subo.x;
|
||||
return o.x;
|
||||
}
|
||||
|
||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2])
|
||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||
{
|
||||
int n = 0, i, j;
|
||||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a);
|
||||
extern void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag);
|
||||
int n = 0, i, j, z[2];
|
||||
kstring_t str;
|
||||
bwahit_t h[2];
|
||||
mem_alnreg_t b[2][2];
|
||||
uint64_t o, subo;
|
||||
|
||||
str.l = str.m = 0; str.s = 0;
|
||||
// perform SW for the best alignment
|
||||
for (i = 0; i < 2; ++i)
|
||||
|
|
@ -233,12 +238,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
for (j = 0; j < 2; ++j)
|
||||
if (b[i][j].score > 0) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i][j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
||||
// pairing single-end hits
|
||||
if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful pairing
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->is_hard);
|
||||
s[0].sam = strdup(str.s); str.l = 0;
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->is_hard);
|
||||
s[1].sam = str.s;
|
||||
} else {
|
||||
o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, z);
|
||||
if (0&&o) { // with proper pairing
|
||||
} else { // no proper pairing
|
||||
mem_mark_primary_se(opt, a[0].n, a[0].a); mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41);
|
||||
mem_mark_primary_se(opt, a[1].n, a[1].a); mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,8 +24,10 @@ int main_mem(int argc, char *argv[])
|
|||
bseq1_t *seqs;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "k:c:v:s:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "PHk:c:v:s:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
|
||||
else if (c == 'H') opt->flag |= MEM_F_HARDCLIP;
|
||||
else if (c == 'c') opt->max_occ = atoi(optarg);
|
||||
else if (c == 'v') mem_verbose = atoi(optarg);
|
||||
else if (c == 's') opt->split_width = atoi(optarg);
|
||||
|
|
@ -59,7 +61,7 @@ int main_mem(int argc, char *argv[])
|
|||
if (optind + 2 < argc) {
|
||||
fp2 = gzopen(argv[optind + 2], "r");
|
||||
ks2 = kseq_init(fp2);
|
||||
opt->is_pe = 1;
|
||||
opt->flag |= MEM_F_PE;
|
||||
}
|
||||
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
||||
|
|
|
|||
Loading…
Reference in New Issue