code backup

This commit is contained in:
Heng Li 2013-02-18 16:33:06 -05:00
parent f0a6285aba
commit 66585b7982
4 changed files with 42 additions and 31 deletions

View File

@ -31,6 +31,7 @@ mem_opt_t *mem_opt_init()
mem_opt_t *o;
o = calloc(1, sizeof(mem_opt_t));
o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100;
o->flag = 0;
o->min_seed_len = 19;
o->split_width = 10;
o->max_occ = 10000;
@ -41,7 +42,6 @@ mem_opt_t *mem_opt_init()
o->chunk_size = 10000000;
o->n_threads = 1;
o->pe_dir = 0<<1|1;
o->is_pe = 0;
mem_fill_scmat(o->a, o->b, o->mat);
return o;
}
@ -598,11 +598,11 @@ void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h)
h->score = a->score;
h->sub = a->sub > a->csub? a->sub : a->csub;
h->qual = 0; // quality unset
h->flag = a->secondary? 0x100 : 0; // only the "secondary" bit is set
h->flag = a->secondary >= 0? 0x100 : 0; // only the "secondary" bit is set
h->mb = h->me = -2; // mate positions are unset
}
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag)
{
int k;
kstring_t str;
@ -612,10 +612,11 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
bwahit_t h;
if (a->a[k].secondary >= 0) continue;
mem_alnreg2hit(&a->a[k], &h);
h.flag |= extra_flag;
h.qual = approx_mapq_se(opt, &a->a[k]);
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->is_hard);
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP);
}
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->is_hard);
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP);
s->sam = str.s;
}
@ -657,25 +658,25 @@ static void *worker1(void *data)
for (i = w->start; i < w->n; i += w->step) {
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a);
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
}
return 0;
}
static void *worker2(void *data)
{
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2]);
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
worker_t *w = (worker_t*)data;
int i;
if (!w->opt->is_pe) {
if (!(w->opt->flag&MEM_F_PE)) {
for (i = 0; i < w->n; i += w->step) {
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a);
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0);
free(w->regs[i].a);
}
} else {
int n = 0;
for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, &w->seqs[i<<1], &w->regs[i<<1]);
n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, i, &w->seqs[i<<1], &w->regs[i<<1]);
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
}
fprintf(stderr, "[M::%s@%d] performed mate-SW for %d reads\n", __func__, w->start, n);
@ -702,21 +703,21 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns
#ifdef HAVE_PTHREAD
if (opt->n_threads == 1) {
worker1(w);
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
} else {
pthread_t *tid;
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
free(tid);
}
#else
worker1(w);
if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
#endif
for (i = 0; i < n; ++i) {

View File

@ -15,13 +15,17 @@ typedef struct {
int32_t qbeg, len;
} mem_seed_t;
#define MEM_F_HARDCLIP 0x1
#define MEM_F_PE 0x2
#define MEM_F_NOPAIRING 0x4
typedef struct {
int a, b, q, r, w;
int flag;
int split_width;
int min_seed_len, max_occ, max_chain_gap;
int n_threads, chunk_size;
int pe_dir, is_pe;
int is_hard; // if to use hard clip
int pe_dir;
float mask_level, chain_drop_ratio;
int max_ins; // maximum insert size
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset

View File

@ -159,11 +159,11 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
return n;
}
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2])
uint64_t mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, uint64_t *sub, int z[2])
{
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
pair64_v v;
pair64_t o, subo; // score<<32 | raw_score<<8 | hash
pair64_t o, subo; // .x: score<<32 | raw_score<<8 | hash; .y: pair
int r, i, k, y[4]; // y[] keeps the last hit
kv_init(v);
for (r = 0; r < 2; ++r) { // loop through read number
@ -198,7 +198,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
ns = (dist - pes[dir].avg) / pes[dir].std;
score = (int)(raw_score - 4.343 / 23. * (opt->a + opt->b) * log(erfc(fabs(ns) * M_SQRT1_2)) + .499);
pair = (uint64_t)k<<32 | i;
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair)&0xff);
x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair ^ id<<8)&0xff);
if (x > o.x) subo = o, o.x = x, o.y = pair;
else if (x > subo.x) subo.x = x, subo.y = pair;
}
@ -207,19 +207,24 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
}
if (o.x > 0) {
i = o.y >> 32; k = o.y << 32 >> 32;
mem_alnreg2hit(&a[v.a[i].y&1].a[v.a[i].y<<32>>34], &h[v.a[i].y&1]);
mem_alnreg2hit(&a[v.a[k].y&1].a[v.a[k].y<<32>>34], &h[v.a[k].y&1]);
z[v.a[i].y&1] = v.a[i].y<<32>>34;
z[v.a[k].y&1] = v.a[k].y<<32>>34;
}
free(v.a);
return o.x == 0? -1 : 0;
*sub = subo.x;
return o.x;
}
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2])
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
{
int n = 0, i, j;
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a);
extern void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag);
int n = 0, i, j, z[2];
kstring_t str;
bwahit_t h[2];
mem_alnreg_t b[2][2];
uint64_t o, subo;
str.l = str.m = 0; str.s = 0;
// perform SW for the best alignment
for (i = 0; i < 2; ++i)
@ -233,12 +238,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
for (j = 0; j < 2; ++j)
if (b[i][j].score > 0) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i][j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
// pairing single-end hits
if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful pairing
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->is_hard);
s[0].sam = strdup(str.s); str.l = 0;
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->is_hard);
s[1].sam = str.s;
} else {
o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, z);
if (0&&o) { // with proper pairing
} else { // no proper pairing
mem_mark_primary_se(opt, a[0].n, a[0].a); mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41);
mem_mark_primary_se(opt, a[1].n, a[1].a); mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81);
}
return n;
}

View File

@ -24,8 +24,10 @@ int main_mem(int argc, char *argv[])
bseq1_t *seqs;
opt = mem_opt_init();
while ((c = getopt(argc, argv, "k:c:v:s:")) >= 0) {
while ((c = getopt(argc, argv, "PHk:c:v:s:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg);
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
else if (c == 'H') opt->flag |= MEM_F_HARDCLIP;
else if (c == 'c') opt->max_occ = atoi(optarg);
else if (c == 'v') mem_verbose = atoi(optarg);
else if (c == 's') opt->split_width = atoi(optarg);
@ -59,7 +61,7 @@ int main_mem(int argc, char *argv[])
if (optind + 2 < argc) {
fp2 = gzopen(argv[optind + 2], "r");
ks2 = kseq_init(fp2);
opt->is_pe = 1;
opt->flag |= MEM_F_PE;
}
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
mem_process_seqs(opt, bwt, bns, pac, n, seqs);