code backup
This commit is contained in:
parent
f0a6285aba
commit
66585b7982
27
bwamem.c
27
bwamem.c
|
|
@ -31,6 +31,7 @@ mem_opt_t *mem_opt_init()
|
||||||
mem_opt_t *o;
|
mem_opt_t *o;
|
||||||
o = calloc(1, sizeof(mem_opt_t));
|
o = calloc(1, sizeof(mem_opt_t));
|
||||||
o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100;
|
o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100;
|
||||||
|
o->flag = 0;
|
||||||
o->min_seed_len = 19;
|
o->min_seed_len = 19;
|
||||||
o->split_width = 10;
|
o->split_width = 10;
|
||||||
o->max_occ = 10000;
|
o->max_occ = 10000;
|
||||||
|
|
@ -41,7 +42,6 @@ mem_opt_t *mem_opt_init()
|
||||||
o->chunk_size = 10000000;
|
o->chunk_size = 10000000;
|
||||||
o->n_threads = 1;
|
o->n_threads = 1;
|
||||||
o->pe_dir = 0<<1|1;
|
o->pe_dir = 0<<1|1;
|
||||||
o->is_pe = 0;
|
|
||||||
mem_fill_scmat(o->a, o->b, o->mat);
|
mem_fill_scmat(o->a, o->b, o->mat);
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
@ -598,11 +598,11 @@ void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h)
|
||||||
h->score = a->score;
|
h->score = a->score;
|
||||||
h->sub = a->sub > a->csub? a->sub : a->csub;
|
h->sub = a->sub > a->csub? a->sub : a->csub;
|
||||||
h->qual = 0; // quality unset
|
h->qual = 0; // quality unset
|
||||||
h->flag = a->secondary? 0x100 : 0; // only the "secondary" bit is set
|
h->flag = a->secondary >= 0? 0x100 : 0; // only the "secondary" bit is set
|
||||||
h->mb = h->me = -2; // mate positions are unset
|
h->mb = h->me = -2; // mate positions are unset
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
|
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag)
|
||||||
{
|
{
|
||||||
int k;
|
int k;
|
||||||
kstring_t str;
|
kstring_t str;
|
||||||
|
|
@ -612,10 +612,11 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
||||||
bwahit_t h;
|
bwahit_t h;
|
||||||
if (a->a[k].secondary >= 0) continue;
|
if (a->a[k].secondary >= 0) continue;
|
||||||
mem_alnreg2hit(&a->a[k], &h);
|
mem_alnreg2hit(&a->a[k], &h);
|
||||||
|
h.flag |= extra_flag;
|
||||||
h.qual = approx_mapq_se(opt, &a->a[k]);
|
h.qual = approx_mapq_se(opt, &a->a[k]);
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->is_hard);
|
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP);
|
||||||
}
|
}
|
||||||
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->is_hard);
|
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP);
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -657,25 +658,25 @@ static void *worker1(void *data)
|
||||||
for (i = w->start; i < w->n; i += w->step) {
|
for (i = w->start; i < w->n; i += w->step) {
|
||||||
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
||||||
w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a);
|
w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a);
|
||||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *worker2(void *data)
|
static void *worker2(void *data)
|
||||||
{
|
{
|
||||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2]);
|
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
|
||||||
worker_t *w = (worker_t*)data;
|
worker_t *w = (worker_t*)data;
|
||||||
int i;
|
int i;
|
||||||
if (!w->opt->is_pe) {
|
if (!(w->opt->flag&MEM_F_PE)) {
|
||||||
for (i = 0; i < w->n; i += w->step) {
|
for (i = 0; i < w->n; i += w->step) {
|
||||||
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
|
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
|
||||||
|
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0);
|
||||||
free(w->regs[i].a);
|
free(w->regs[i].a);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet
|
for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet
|
||||||
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, &w->seqs[i<<1], &w->regs[i<<1]);
|
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, i, &w->seqs[i<<1], &w->regs[i<<1]);
|
||||||
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "[M::%s@%d] performed mate-SW for %d reads\n", __func__, w->start, n);
|
fprintf(stderr, "[M::%s@%d] performed mate-SW for %d reads\n", __func__, w->start, n);
|
||||||
|
|
@ -702,21 +703,21 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns
|
||||||
#ifdef HAVE_PTHREAD
|
#ifdef HAVE_PTHREAD
|
||||||
if (opt->n_threads == 1) {
|
if (opt->n_threads == 1) {
|
||||||
worker1(w);
|
worker1(w);
|
||||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||||
worker2(w);
|
worker2(w);
|
||||||
} else {
|
} else {
|
||||||
pthread_t *tid;
|
pthread_t *tid;
|
||||||
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
|
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
free(tid);
|
free(tid);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
worker1(w);
|
worker1(w);
|
||||||
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||||
worker2(w);
|
worker2(w);
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
|
|
|
||||||
8
bwamem.h
8
bwamem.h
|
|
@ -15,13 +15,17 @@ typedef struct {
|
||||||
int32_t qbeg, len;
|
int32_t qbeg, len;
|
||||||
} mem_seed_t;
|
} mem_seed_t;
|
||||||
|
|
||||||
|
#define MEM_F_HARDCLIP 0x1
|
||||||
|
#define MEM_F_PE 0x2
|
||||||
|
#define MEM_F_NOPAIRING 0x4
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int a, b, q, r, w;
|
int a, b, q, r, w;
|
||||||
|
int flag;
|
||||||
int split_width;
|
int split_width;
|
||||||
int min_seed_len, max_occ, max_chain_gap;
|
int min_seed_len, max_occ, max_chain_gap;
|
||||||
int n_threads, chunk_size;
|
int n_threads, chunk_size;
|
||||||
int pe_dir, is_pe;
|
int pe_dir;
|
||||||
int is_hard; // if to use hard clip
|
|
||||||
float mask_level, chain_drop_ratio;
|
float mask_level, chain_drop_ratio;
|
||||||
int max_ins; // maximum insert size
|
int max_ins; // maximum insert size
|
||||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||||
|
|
|
||||||
|
|
@ -159,11 +159,11 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2])
|
uint64_t mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, uint64_t *sub, int z[2])
|
||||||
{
|
{
|
||||||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
||||||
pair64_v v;
|
pair64_v v;
|
||||||
pair64_t o, subo; // score<<32 | raw_score<<8 | hash
|
pair64_t o, subo; // .x: score<<32 | raw_score<<8 | hash; .y: pair
|
||||||
int r, i, k, y[4]; // y[] keeps the last hit
|
int r, i, k, y[4]; // y[] keeps the last hit
|
||||||
kv_init(v);
|
kv_init(v);
|
||||||
for (r = 0; r < 2; ++r) { // loop through read number
|
for (r = 0; r < 2; ++r) { // loop through read number
|
||||||
|
|
@ -198,7 +198,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
||||||
ns = (dist - pes[dir].avg) / pes[dir].std;
|
ns = (dist - pes[dir].avg) / pes[dir].std;
|
||||||
score = (int)(raw_score - 4.343 / 23. * (opt->a + opt->b) * log(erfc(fabs(ns) * M_SQRT1_2)) + .499);
|
score = (int)(raw_score - 4.343 / 23. * (opt->a + opt->b) * log(erfc(fabs(ns) * M_SQRT1_2)) + .499);
|
||||||
pair = (uint64_t)k<<32 | i;
|
pair = (uint64_t)k<<32 | i;
|
||||||
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair)&0xff);
|
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair ^ id<<8)&0xff);
|
||||||
if (x > o.x) subo = o, o.x = x, o.y = pair;
|
if (x > o.x) subo = o, o.x = x, o.y = pair;
|
||||||
else if (x > subo.x) subo.x = x, subo.y = pair;
|
else if (x > subo.x) subo.x = x, subo.y = pair;
|
||||||
}
|
}
|
||||||
|
|
@ -207,19 +207,24 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
||||||
}
|
}
|
||||||
if (o.x > 0) {
|
if (o.x > 0) {
|
||||||
i = o.y >> 32; k = o.y << 32 >> 32;
|
i = o.y >> 32; k = o.y << 32 >> 32;
|
||||||
mem_alnreg2hit(&a[v.a[i].y&1].a[v.a[i].y<<32>>34], &h[v.a[i].y&1]);
|
z[v.a[i].y&1] = v.a[i].y<<32>>34;
|
||||||
mem_alnreg2hit(&a[v.a[k].y&1].a[v.a[k].y<<32>>34], &h[v.a[k].y&1]);
|
z[v.a[k].y&1] = v.a[k].y<<32>>34;
|
||||||
}
|
}
|
||||||
free(v.a);
|
free(v.a);
|
||||||
return o.x == 0? -1 : 0;
|
*sub = subo.x;
|
||||||
|
return o.x;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2])
|
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||||
{
|
{
|
||||||
int n = 0, i, j;
|
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a);
|
||||||
|
extern void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag);
|
||||||
|
int n = 0, i, j, z[2];
|
||||||
kstring_t str;
|
kstring_t str;
|
||||||
bwahit_t h[2];
|
bwahit_t h[2];
|
||||||
mem_alnreg_t b[2][2];
|
mem_alnreg_t b[2][2];
|
||||||
|
uint64_t o, subo;
|
||||||
|
|
||||||
str.l = str.m = 0; str.s = 0;
|
str.l = str.m = 0; str.s = 0;
|
||||||
// perform SW for the best alignment
|
// perform SW for the best alignment
|
||||||
for (i = 0; i < 2; ++i)
|
for (i = 0; i < 2; ++i)
|
||||||
|
|
@ -233,12 +238,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
for (j = 0; j < 2; ++j)
|
for (j = 0; j < 2; ++j)
|
||||||
if (b[i][j].score > 0) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i][j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
if (b[i][j].score > 0) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i][j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
||||||
// pairing single-end hits
|
// pairing single-end hits
|
||||||
if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful pairing
|
o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, z);
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->is_hard);
|
if (0&&o) { // with proper pairing
|
||||||
s[0].sam = strdup(str.s); str.l = 0;
|
} else { // no proper pairing
|
||||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->is_hard);
|
mem_mark_primary_se(opt, a[0].n, a[0].a); mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41);
|
||||||
s[1].sam = str.s;
|
mem_mark_primary_se(opt, a[1].n, a[1].a); mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81);
|
||||||
} else {
|
|
||||||
}
|
}
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,10 @@ int main_mem(int argc, char *argv[])
|
||||||
bseq1_t *seqs;
|
bseq1_t *seqs;
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
while ((c = getopt(argc, argv, "k:c:v:s:")) >= 0) {
|
while ((c = getopt(argc, argv, "PHk:c:v:s:")) >= 0) {
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||||
|
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
|
||||||
|
else if (c == 'H') opt->flag |= MEM_F_HARDCLIP;
|
||||||
else if (c == 'c') opt->max_occ = atoi(optarg);
|
else if (c == 'c') opt->max_occ = atoi(optarg);
|
||||||
else if (c == 'v') mem_verbose = atoi(optarg);
|
else if (c == 'v') mem_verbose = atoi(optarg);
|
||||||
else if (c == 's') opt->split_width = atoi(optarg);
|
else if (c == 's') opt->split_width = atoi(optarg);
|
||||||
|
|
@ -59,7 +61,7 @@ int main_mem(int argc, char *argv[])
|
||||||
if (optind + 2 < argc) {
|
if (optind + 2 < argc) {
|
||||||
fp2 = gzopen(argv[optind + 2], "r");
|
fp2 = gzopen(argv[optind + 2], "r");
|
||||||
ks2 = kseq_init(fp2);
|
ks2 = kseq_init(fp2);
|
||||||
opt->is_pe = 1;
|
opt->flag |= MEM_F_PE;
|
||||||
}
|
}
|
||||||
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||||
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue