diff --git a/.vscode/launch.json b/.vscode/launch.json index d1ecb76..0a30413 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -21,7 +21,8 @@ "~/data/fastq/dataset/na12878_wes_144/1w_1.fq", "~/data/fastq/dataset/na12878_wes_144/1w_2.fq", "-o", - "/dev/null" + "/dev/null", + "-Z" ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 }, @@ -58,6 +59,19 @@ "program": "${workspaceRoot}/fastbwa", "args": [ "shm", + "-Z", + "~/data1/fmt_ref/human_g1k_v37_decoy.fasta" + ], + "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 + }, + { + "name": "pac to bref", + "preLaunchTask": "Build", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceRoot}/fastbwa", + "args": [ + "pac2bref", "~/data1/fmt_ref/human_g1k_v37_decoy.fasta" ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 diff --git a/.vscode/settings.json b/.vscode/settings.json index ba57521..d9cb6da 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -40,6 +40,8 @@ "bntseq.h": "c", "inttypes.h": "c", "ertindex.h": "c", - "ertseeding.h": "c" + "ertseeding.h": "c", + "algorithm": "c", + "filesystem": "c" } } \ No newline at end of file diff --git a/Makefile b/Makefile index f5af91e..9506ced 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC= gcc -CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 +CFLAGS= -g -Wall -Wno-unused-function -mavx2 #-O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS SHOW_PERF= -DSHOW_PERF diff --git a/bwa.c b/bwa.c index 1d6bb1c..8827e57 100644 --- a/bwa.c +++ b/bwa.c @@ -43,7 +43,7 @@ # include "malloc_wrap.h" #endif -int bwa_verbose = 4; +int bwa_verbose = 3; int bwa_dbg = 0; char bwa_rg_id[256]; char *bwa_pg; @@ -514,9 +514,11 @@ bwaidx_t *bwa_ertidx_load_from_disk(const char *hint) { char kmer_tbl_file_name[PATH_MAX]; char ml_tbl_file_name[PATH_MAX]; + char bref_file_name[PATH_MAX]; sprintf(kmer_tbl_file_name, "%s.ert.kmer.table", prefix); sprintf(ml_tbl_file_name, "%s.ert.mlt.table", prefix); - FILE *kmer_tbl_fd, *ml_tbl_fd; + sprintf(bref_file_name, "%s.ert.0123", prefix); + FILE *kmer_tbl_fd, *ml_tbl_fd, *bref_fd; kmer_tbl_fd = fopen(kmer_tbl_file_name, "rb"); if (kmer_tbl_fd == NULL) { fprintf(stderr, "[M::%s::ERT] Can't open k-mer index\n.", __func__); @@ -558,10 +560,27 @@ bwaidx_t *bwa_ertidx_load_from_disk(const char *hint) fprintf(stderr, "[M::%s::ERT] Reading multi-level tree index to memory\n", __func__); } err_fread_noeof(idx->ert->mlt_table, sizeof(uint8_t), size, ml_tbl_fd); + + // Read binary ref + bref_fd = fopen(bref_file_name, "rb"); + if (bref_fd == NULL) { + fprintf(stderr, "[M::%s::ERT] Can't open binary reference file\n.", __func__); + exit(1); + } + fseek(bref_fd, 0L, SEEK_END); + idx->ert->bref_size = ftell(bref_fd); + if (bwa_verbose >= 3) { + fprintf(stderr, "[M::%s::ERT] Reading binary reference to memory\n", __func__); + } + allocMem += idx->ert->bref_size; + idx->ert->bref = (uint8_t *)malloc(idx->ert->bref_size * sizeof(uint8_t)); + fseek(bref_fd, 0L, SEEK_SET); + err_fread_noeof(idx->ert->bref, sizeof(uint8_t), idx->ert->bref_size, bref_fd); + fclose(kmer_tbl_fd); fclose(ml_tbl_fd); - if (bwa_verbose >= 3) - { + fclose(bref_fd); + if (bwa_verbose >= 3) { fprintf(stderr, "[M::%s::ERT] Index tables loaded in %.3f CPU sec, %.3f real sec...\n", __func__, cputime() - ctime, realtime() - rtime); } } @@ -713,6 +732,7 @@ int bwa_mem2ertidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) x = sizeof(ERT); idx->ert = malloc(x); memcpy(idx->ert, mem + k, x); k += x; x = idx->ert->mlt_size; idx->ert->mlt_table = (uint8_t *)(mem + k); k += x; x = idx->ert->kmer_size; idx->ert->kmer_offsets = (uint64_t *)(mem + k); k += x; +// x = idx->ert->bref_size; idx->ert->bref = (uint8_t *)(mem + k); k += x; // generate idx->bns and idx->pac mem_to_bnspac(idx, &mem, &k); @@ -774,9 +794,14 @@ int bwa_ertidx2mem(bwaidx_t *idx) x = idx->ert->kmer_size; mem = realloc(mem, k + x); memcpy(mem + k, idx->ert->kmer_offsets, x); k += x; free(idx->ert->kmer_offsets); idx->ert->kmer_offsets = 0; + + x = idx->ert->bref_size; + mem = realloc(mem, k + x); memcpy(mem + k, idx->ert->bref, x); k += x; + free(idx->ert->bref); idx->ert->bref = 0; free(idx->ert); idx->ert = 0; + // copy idx->bns move_bns_to_mem(idx, &mem, &k); // copy idx->pac diff --git a/bwa.h b/bwa.h index c8012fc..583423b 100644 --- a/bwa.h +++ b/bwa.h @@ -51,8 +51,10 @@ typedef struct { uint64_t *kmer_offsets; // ert kmer uint8_t *mlt_table; // ert mlt + uint8_t *bref; // binary ref uint64_t kmer_size; uint64_t mlt_size; + uint64_t bref_size; } ERT; typedef struct { diff --git a/bwamem.c b/bwamem.c index 29237b6..1977fd4 100644 --- a/bwamem.c +++ b/bwamem.c @@ -45,6 +45,7 @@ #include "fmt_idx.h" #include "profiling.h" #include "debug.h" +#include "ertseeding.h" #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" @@ -75,6 +76,9 @@ static const bntseq_t *global_bns = 0; // for debugging only +#define smem_lt_2(a, b) ((a).start == (b).start ? (a).end < (b).end : (a).start < (b).start) +KSORT_INIT(mem_smem_sort_lt, mem_t, smem_lt_2) + mem_opt_t *mem_opt_init() { mem_opt_t *o; @@ -1264,11 +1268,11 @@ static void smem_aux_destroy(smem_aux_t *a) } // 初始化线程需要的数据 -mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac) +mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const ERT *ert, const bntseq_t *bns, const uint8_t *pac, int useERT) { int i = opt->n_threads, j; mem_worker_t *w = calloc(1, sizeof(mem_worker_t)); - w->opt = opt; w->bns = bns; w->pac = pac; w->fmt = fmt; + w->opt = opt; w->bns = bns; w->pac = pac; w->fmt = fmt; w->ert = ert; w->useERT = useERT; w->calc_isize = 0; w->n = 0; w->regs = 0; w->aux = malloc(i * sizeof(smem_aux_t)); w->smem_arr = malloc(i * sizeof(smem_v *)); @@ -1509,7 +1513,6 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t bseq1_t *seq_arr, int nseq, smem_aux_t *aux, smem_v *smem_arr, mem_chain_v *chain_arr, mem_alnreg_v *reg_arr, int calc_isize, int64_t l_pac, uint64_v *isize, int tid) { - int i, j, l_seq; mem_chain_v *chnp; mem_alnreg_v *regp; @@ -1602,13 +1605,270 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t } #endif } + +void ert_generate_chain(const mem_opt_t *opt, const bntseq_t *bns, int len, const uint8_t *seq, + mem_v *smems, mem_chain_v *chain, u64v *hits, int tid) +{ + int i, b = 0, e = 0, l_rep = 0; + int64_t l_pac = bns->l_pac; + kbtree_t(chn) * tree; + if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match + tree = kb_init(chn, KB_DEFAULT_SIZE + 8); // +8, due to addition of counters in chain + for (i = 0, b = e = l_rep = 0; i < smems->n; ++i) { // compute frac_rep + mem_t *p = &smems->a[i]; + int sb = p->start, se = p->end; + if (p->hitcount <= opt->max_occ) continue; + if (sb > e) l_rep += e - b, b = sb, e = se; + else e = e > se? e : se; + } + l_rep += e - b; + + for (i = 0; i < smems->n; ++i) { + mem_t *p = &smems->a[i]; + int step, count, slen = p->end - p->start; // seed length + int64_t k; + step = p->hitcount > opt->max_occ ? p->hitcount / opt->max_occ : 1; + for (k = count = 0; k < p->hitcount && count < opt->max_occ; k += step, ++count) { + mem_chain_t tmp, *lower, *upper; + mem_seed_t s; + int rid, to_add = 0; + if (p->forward || p->fetch_leaves) { + s.rbeg = tmp.pos = hits->a[p->hitbeg + k]; // this is the base coordinate in the forward-reverse reference + } + else { + // Hit was obtained by backward search and corresponds to location of reverse complemented SMEM + // Add correction to hit position to get locations of SMEM. + s.rbeg = tmp.pos = (bns->l_pac << 1) - (hits->a[p->hitbeg + k] + slen - p->end_correction); + } + s.qbeg = p->start; + s.len = p->end - p->start; + s.score = s.len; + rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len); + if (rid < 0) continue; // bridging multiple reference sequences or the forward-reverse boundary; TODO: split the seed; don't discard it!!! + if (kb_size(tree)) { + kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain + if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) to_add = 1; + } + else to_add = 1; + if (to_add) { // add the seed as a new chain + tmp.n = 1; + tmp.m = 4; + tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t)); + tmp.seeds[0] = s; + tmp.rid = rid; + tmp.is_alt = !!bns->anns[rid].is_alt; + kb_putp(chn, tree, &tmp); + } + } + } + if (chain->m < kb_size(tree)) { + kv_resize(mem_chain_t, *chain, kb_size(tree)); + } + +#define traverse_func(p_) (chain->a[chain->n++] = *(p_)) + __kb_traverse(mem_chain_t, tree, traverse_func); +#undef traverse_func + + for (i = 0; i < chain->n; ++i) chain->a[i].frac_rep = (float)l_rep / len; + if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len); + kb_destroy(chn, tree); +} + +void ert_smem_chain(const mem_opt_t *opt, const ERT *ert, const bntseq_t *bns, const uint8_t *pac, + int len, const uint8_t *seq, smem_aux_t *aux, mem_chain_v *chn, int tid) +{ + if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match + if (len > ERT_MAX_READ_LEN) + { + fprintf(stderr, "Your dataset has reads with length %d. But the ERT index built was for reads with length <= %d. " + "Please set ERT_MAX_READ_LEN to the maximum read length in your dataset and rebuild the ERT index. " + "Default ERT index supports mapping of reads with length <= %d bp\n", len, ERT_MAX_READ_LEN, ERT_MAX_READ_LEN); + exit(EXIT_FAILURE); + } + mem_v *smems = calloc(1, sizeof(mem_v)); kv_init(*smems); + u64v *hits = calloc(1, sizeof(u64v)); kv_init(*hits); + // int64_t seedBufSize = 256; + // mem_seed_t *seedBuf = malloc(sizeof(mem_seed_t) * seedBufSize); + // int64_t seedBufCount = 0; + + int hasN = 0; + int i; + uint8_t unpacked_rc_queue_buf[ERT_MAX_READ_LEN]; + + + for (i = 0; i < len; ++i) { + if (seq[i] < 4) { + unpacked_rc_queue_buf[len - i - 1] = 3 - seq[i]; + } else { + hasN = 1; + unpacked_rc_queue_buf[len - i - 1] = 4; + } + } + + int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); + index_aux_t iaux; + iaux.kmer_offsets = ert->kmer_offsets; + iaux.mlt_table = ert->mlt_table; + iaux.bns = bns; + iaux.pac = pac; + iaux.ref_string = ert->bref; + + read_aux_t raux; + raux.min_seed_len = opt->min_seed_len; + raux.l_seq = len; + raux.unpacked_queue_buf = (uint8_t *)seq; + raux.unpacked_rc_queue_buf = unpacked_rc_queue_buf; + + hits->n = 0; + + // seed-1 + PROF_START(seed_1); + if (hasN) { + get_seeds(&iaux, &raux, smems, hits); + } + else { + get_seeds_prefix(&iaux, &raux, smems, hits); + } + PROF_END(tprof[T_SEED_1][tid], seed_1); + // fprintf(stderr, "hits: %ld\n", hits->n); + + // seed-2 + PROF_START(seed_2); + int old_n = smems->n; + for (i = 0; i < old_n; ++i) { + int qbeg = smems->a[i].start; + int qend = smems->a[i].end; + if ((qend - qbeg) < split_len || smems->a[i].hitcount > opt->split_width) { + continue; + } + if (hasN) { + reseed(&iaux, &raux, smems, + (qbeg + qend) >> 1, smems->a[i].hitcount + 1, + &smems->a[i].pt, hits); + } + else { + reseed_prefix(&iaux, &raux, smems, + (qbeg + qend) >> 1, smems->a[i].hitcount + 1, + &smems->a[i].pt, hits); + } + } + PROF_END(tprof[T_SEED_2][tid], seed_2); + + // seed-3 + PROF_START(seed_3); + last(&iaux, &raux, smems, opt->max_mem_intv, hits); + PROF_END(tprof[T_SEED_3][tid], seed_3); + + // sort + ks_introsort(mem_smem_sort_lt, smems->n, smems->a); + + // chain + PROF_START(chain_all); + chn->n = 0; + PROF_START(gen_chain); + ert_generate_chain(opt, bns, len, seq, + smems, chn, hits, tid); + PROF_END(tprof[T_GEN_CHAIN][tid], gen_chain); + PROF_START(flt_chain); + chn->n = mem_chain_flt(opt, chn->n, chn->a); + PROF_END(tprof[T_FLT_CHAIN][tid], flt_chain); + PROF_START(flt_chained_seeds); + mem_flt_chained_seeds(opt, bns, pac, len, (uint8_t *)seq, chn->n, chn->a, aux->seq_buf); + PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds); + PROF_END(tprof[T_CHAIN_ALL][tid], chain_all); + kv_destroy(*smems); free(smems); + kv_destroy(*hits); free(hits); +} + +// 使用ERT作为seeding阶段索引 +void ert_mem_core_process(const mem_opt_t *opt, const ERT *ert, const bntseq_t *bns, const uint8_t *pac, + bseq1_t *seq_arr, int nseq, smem_aux_t *aux, smem_v *smem_arr, mem_chain_v *chain_arr, mem_alnreg_v *reg_arr, + int calc_isize, int64_t l_pac, uint64_v *isize, int tid) +{ + int i, j, l_seq; + mem_chain_v *chnp; + mem_alnreg_v *regp; + char *seq; + + // 1. seeding and 2. chain + PROF_START(seed_all); + for (i = 0; i < nseq; ++i) { + seq = seq_arr[i].seq; + l_seq = seq_arr[i].l_seq; + for (j = 0; j < l_seq; ++j) { + seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]]; + } + ert_smem_chain(opt, ert, bns, pac, l_seq, (uint8_t *)seq, aux, &chain_arr[i], tid); + } + PROF_END(tprof[T_SEED_ALL][tid], seed_all); + + // 3. align + PROF_START(aln_all); + for (i=0; in; ++j) { + mem_chain_t *p = &chnp->a[j]; + if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", j); + mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid); + free(chnp->a[j].seeds); + } + + free(chnp->a); chnp->m = 0; chnp->a = 0; + regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a); + if (bwa_verbose >= 4) { + err_printf("* %ld chains remain after removing duplicated chains\n", regp->n); + for (j = 0; j < regp->n; ++j) { + mem_alnreg_t *p = ®p->a[j]; + printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re); + } + } + for (j = 0; j < regp->n; ++j) { + mem_alnreg_t *p = ®p->a[j]; + if (p->rid >= 0 && bns->anns[p->rid].is_alt) + p->is_alt = 1; + } + } + PROF_END(tprof[T_ALN_ALL][tid], aln_all); + +#if 1 + // 4. calc insert size + +#define MIN_RATIO 0.8 + if (calc_isize) { + PROF_START(ins_size); + for (i = 0; i < nseq>>1; ++i) { + int dir; + int64_t is; + mem_alnreg_v *r[2]; + r[0] = (mem_alnreg_v*)®_arr[i<<1|0]; + r[1] = (mem_alnreg_v*)®_arr[i<<1|1]; + if (r[0]->n == 0 || r[1]->n == 0) continue; + if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue; + if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue; + if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr + dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is); + if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is); + } + PROF_END(tprof[T_INS_SIZE][tid], ins_size); + } +#endif +} + // 找smem,生成chain,然后align static void worker_smem_align(void *data, int i, int tid) { mem_worker_t *w = (mem_worker_t *)data; int start = i * w->opt->batch_size; int end = MIN(start + w->opt->batch_size, w->n_reads); - mem_core_process(w->opt, w->fmt, w->bns, w->pac, w->seqs + start, end - start, w->aux[tid], w->smem_arr[tid], w->chain_arr[tid], w->regs + start, w->calc_isize, w->bns->l_pac, w->isize_arr[tid], tid); + if (w->useERT) + ert_mem_core_process(w->opt, w->ert, w->bns, w->pac, w->seqs + start, end - start, w->aux[tid], w->smem_arr[tid], w->chain_arr[tid], w->regs + start, w->calc_isize, w->bns->l_pac, w->isize_arr[tid], tid); + else + mem_core_process(w->opt, w->fmt, w->bns, w->pac, w->seqs + start, end - start, w->aux[tid], w->smem_arr[tid], w->chain_arr[tid], w->regs + start, w->calc_isize, w->bns->l_pac, w->isize_arr[tid], tid); } void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, seq_sam_t *sams) @@ -1631,7 +1891,7 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed w->n_reads = n; w->pes = &pes[0]; #if 1 - if ((opt->flag & MEM_F_PE) && !pes0) { // infer insert sizes if not provided + if ((opt->flag & MEM_F_PE) && !pes0) { // infer insert sizes if not provided int i, j; w->calc_isize = 1; for (i = 0; i < opt->n_threads; ++i) diff --git a/bwamem.h b/bwamem.h index 3f7a2c8..e7fe380 100644 --- a/bwamem.h +++ b/bwamem.h @@ -157,6 +157,7 @@ typedef struct { const mem_opt_t *opt; const bwt_t *bwt; const FMTIndex *fmt; + const ERT *ert; const bntseq_t *bns; const uint8_t *pac; const mem_pestat_t *pes; @@ -170,12 +171,13 @@ typedef struct { int64_t n_processed; int64_t n; int64_t n_reads; + int useERT; } mem_worker_t; #ifdef __cplusplus extern "C" { #endif - mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac); + mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const ERT *ert, const bntseq_t *bns, const uint8_t *pac, int useERT); smem_i *smem_itr_init(const bwt_t *bwt); void smem_itr_destroy(smem_i *itr); diff --git a/bwashm.c b/bwashm.c index 99c84b8..53e2be4 100644 --- a/bwashm.c +++ b/bwashm.c @@ -8,15 +8,16 @@ #include #include #include "bwa.h" +#include "ertindex.h" -int bwa_shm_stage(bwaidx_t *idx, const char *hint, int useERT, const char *_tmpfn) +int bwa_shm_stage(bwaidx_t *idx, const char *hint, int useERT) { const char *name; uint8_t *shm, *shm_idx; uint16_t *cnt; int shmid, to_init = 0, l; - char path[PATH_MAX + 1], *tmpfn = (char*)_tmpfn; + char path[PATH_MAX + 1], *tmpfn = 0; if (hint == 0 || hint[0] == 0) return -1; for (name = hint + strlen(hint) - 1; name >= hint && *name != '/'; --name); @@ -121,9 +122,16 @@ int bwa_shm_stage(bwaidx_t *idx, const char *hint, int useERT, const char *_tmpf bwaidx_t *bwa_ertidx_load_from_shm(const char *hint_) { char hint[PATH_MAX]; + char fn[PATH_MAX]; + FILE *fp; sprintf(hint, "%s.ert", hint_); INIT_SHM_LOAD; bwa_mem2ertidx(l_mem, shm_idx, idx); + idx->ert->bref = (uint8_t *)malloc(idx->ert->bref_size * sizeof(uint8_t)); + sprintf(fn, "%s.ert.0123", hint_); + fp = xopen(fn, "rb"); + err_fread_noeof(idx->ert->bref, sizeof(uint8_t), idx->ert->bref_size, fp); + err_fclose(fp); idx->is_shm = 1; return idx; } @@ -228,23 +236,283 @@ int bwa_shm_destroy(void) return 0; } +int bwa_shm_stage_ert(const char *idx_prefix) +{ + const char *name; + uint8_t *shm, *mem; + uint16_t *cnt; + int shmid, to_init = 0, l; + char path[PATH_MAX], ert_prefix[PATH_MAX]; + int64_t l_mem = 0, x = 0, k = 0; + char fn[PATH_MAX]; + FILE *fp; + ERT ert; + bntseq_t *bns; + int i; + + // clac l_mem + // ert + x = sizeof(ERT); l_mem += x; + sprintf(fn, "%s.ert.mlt.table", idx_prefix); + fp = xopen(fn, "rb"); + err_fseek(fp, 0L, SEEK_END); + ert.mlt_size = ftell(fp); + err_fclose(fp); + // multi-level tree + x = ert.mlt_size; l_mem += x; + // kmer entry + ert.kmer_size = numKmers * sizeof(uint64_t); + x = ert.kmer_size; l_mem += x; + // binary ref + sprintf(fn, "%s.ert.0123", idx_prefix); + fp = xopen(fn, "rb"); + fseek(fp, 0L, SEEK_END); + ert.bref_size = ftell(fp); +// x = ert.bref_size; l_mem += x; + err_fclose(fp); + // bns + x = sizeof(bntseq_t); l_mem += x; + bns = bns_restore(idx_prefix); + x = bns->n_holes * sizeof(bntamb1_t); l_mem += x; + x = bns->n_seqs * sizeof(bntann1_t); l_mem += x; + for (i = 0; i < bns->n_seqs; ++i) { + x = strlen(bns->anns[i].name) + 1; l_mem += x; + x = strlen(bns->anns[i].anno) + 1; l_mem += x; + } + // pac + x = bns->l_pac / 4 + 1; l_mem += x; + + fprintf(stderr, "l_mem: %ld\n", l_mem); + + sprintf(ert_prefix, "%s.ert", idx_prefix); + for (name = ert_prefix + strlen(ert_prefix) - 1; name >= ert_prefix && *name != '/'; --name); + ++name; + if ((shmid = shm_open("/bwactl", O_RDWR, 0)) < 0) { + shmid = shm_open("/bwactl", O_CREAT|O_RDWR|O_EXCL, 0644); + to_init = 1; + } + if (shmid < 0) return -1; + if (ftruncate(shmid, BWA_CTL_SIZE) < 0) return -1; + shm = mmap(0, BWA_CTL_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); + cnt = (uint16_t*)shm; + if (to_init) { + memset(shm, 0, BWA_CTL_SIZE); + cnt[1] = 4; + } + strcat(strcpy(path, "/bwaidx-"), name); + if ((shmid = shm_open(path, O_CREAT|O_RDWR|O_EXCL, 0644)) < 0) { + shm_unlink(path); + perror("shm_open()"); + return -1; + } + l = 8 + strlen(name) + 1; + if (cnt[1] + l > BWA_CTL_SIZE) return -1; + memcpy(shm + cnt[1], &l_mem, 8); + memcpy(shm + cnt[1] + 8, name, l - 8); + cnt[1] += l; ++cnt[0]; + if (ftruncate(shmid, l_mem) < 0) return -1; + mem = mmap(0, l_mem, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); + + // write to share mem + // ert + x = sizeof(ERT); + memcpy(mem, &ert, x); k = x; + // multi-level tree + sprintf(fn, "%s.ert.mlt.table", idx_prefix); + fp = xopen(fn, "rb"); + x = ert.mlt_size; + fread_fix(fp, x, mem + k); k += x; + err_fclose(fp); + // kmer entry + sprintf(fn, "%s.ert.kmer.table", idx_prefix); + fp = xopen(fn, "rb"); + x = ert.kmer_size; + fread_fix(fp, x, mem + k); k += x; + err_fclose(fp); + // binary ref +// sprintf(fn, "%s.ert.0123", idx_prefix); +// fp = xopen(fn, "rb"); +// x = ert.bref_size; +// fread_fix(fp, x, mem + k); k += x; +// err_fclose(fp); + // bns + x = sizeof(bntseq_t); memcpy(mem + k, bns, x); k += x; + x = bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, bns->ambs, x); k += x; + x = bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, bns->anns, x); k += x; + for (i = 0; i < bns->n_seqs; ++i) { + x = strlen(bns->anns[i].name) + 1; memcpy(mem + k, bns->anns[i].name, x); k += x; + x = strlen(bns->anns[i].anno) + 1; memcpy(mem + k, bns->anns[i].anno, x); k += x; + } + // pac + x = bns->l_pac / 4 + 1; + err_fread_noeof(mem + k, 1, x, bns->fp_pac); k += x;// concatenated 2-bit encoded sequence + err_fclose(bns->fp_pac); + + fprintf(stderr, "k: %ld\n", k); + return 0; +} + +int bwa_shm_stage_fmt(const char *idx_prefix) +{ + const char *name; + uint8_t *shm, *mem; + uint16_t *cnt; + int shmid, to_init = 0, l; + char path[PATH_MAX]; + int64_t l_mem = 0, x = 0, k = 0; + char fn[PATH_MAX]; + FILE *fp; + FMTIndex fmt; + bntseq_t *bns; + int i; + // clac l_mem + // fmt + x = sizeof(FMTIndex); l_mem += x; + sprintf(fn, "%s.fmt", idx_prefix); + fp = xopen(fn, "rb"); + err_fseek(fp, 0, SEEK_END); + x = ftell(fp) - sizeof(bwtint_t) * 6 - 3; l_mem += x; + fmt.bwt_size = x >> 2; + fseek(fp, 0, SEEK_SET); + err_fread_noeof(&fmt.primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&fmt.sec_primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&fmt.sec_bcp, sizeof(uint8_t), 1, fp); + err_fread_noeof(&fmt.first_base, sizeof(uint8_t), 1, fp); + err_fread_noeof(&fmt.last_base, sizeof(uint8_t), 1, fp); + err_fread_noeof(fmt.L2 + 1, sizeof(bwtint_t), 4, fp); + fmt.seq_len = fmt.L2[4]; + fmt_gen_cnt_occ(&fmt); + err_fclose(fp); + // sa + sprintf(fn, "%s.bytesa", idx_prefix); + fp = xopen(fn, "rb"); + err_fseek(fp, sizeof(bwtint_t) * 5, SEEK_SET); + err_fread_noeof(&fmt.sa_intv, sizeof(bwtint_t), 1, fp); + fmt.n_sa = (fmt.seq_len + fmt.sa_intv) / fmt.sa_intv; + x = SA_BYTES(fmt.n_sa); l_mem += x; + err_fclose(fp); + // kmaer hash + x = (1 << (10 << 1)) * sizeof(KmerEntryArr) + + (1 << (11 << 1)) * sizeof(KmerEntry) + + (1 << (12 << 1)) * sizeof(KmerEntry); +#if HASH_KMER_LEN > 12 + x += (1 << (13 << 1)) * sizeof(KmerEntry); +#endif +#if HASH_KMER_LEN > 13 + x += (1 << (14 << 1)) * sizeof(KmerEntry); +#endif + l_mem += x; + // bns + x = sizeof(bntseq_t); l_mem += x; + bns = bns_restore(idx_prefix); + x = bns->n_holes * sizeof(bntamb1_t); l_mem += x; + x = bns->n_seqs * sizeof(bntann1_t); l_mem += x; + for (i = 0; i < bns->n_seqs; ++i) { + x = strlen(bns->anns[i].name) + 1; l_mem += x; + x = strlen(bns->anns[i].anno) + 1; l_mem += x; + } + // pac + x = bns->l_pac / 4 + 1; l_mem += x; + fmt.l_pac = bns->l_pac; + + fprintf(stderr, "l_mem: %ld\n", l_mem); + + for (name = idx_prefix + strlen(idx_prefix) - 1; name >= idx_prefix && *name != '/'; --name) ; + ++name; + if ((shmid = shm_open("/bwactl", O_RDWR, 0)) < 0) { + shmid = shm_open("/bwactl", O_CREAT|O_RDWR|O_EXCL, 0644); + to_init = 1; + } + if (shmid < 0) return -1; + if (ftruncate(shmid, BWA_CTL_SIZE) < 0) return -1; + shm = mmap(0, BWA_CTL_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); + cnt = (uint16_t*)shm; + if (to_init) { + memset(shm, 0, BWA_CTL_SIZE); + cnt[1] = 4; + } + strcat(strcpy(path, "/bwaidx-"), name); + if ((shmid = shm_open(path, O_CREAT|O_RDWR|O_EXCL, 0644)) < 0) { + shm_unlink(path); + perror("shm_open()"); + return -1; + } + l = 8 + strlen(name) + 1; + if (cnt[1] + l > BWA_CTL_SIZE) return -1; + memcpy(shm + cnt[1], &l_mem, 8); + memcpy(shm + cnt[1] + 8, name, l - 8); + cnt[1] += l; ++cnt[0]; + if (ftruncate(shmid, l_mem) < 0) return -1; + mem = mmap(0, l_mem, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); + + // write to share mem + + // fmt + x = sizeof(FMTIndex); + memcpy(mem, &fmt, x); k = x; + sprintf(fn, "%s.fmt", idx_prefix); + fp = xopen(fn, "rb"); + err_fseek(fp, sizeof(bwtint_t) * 6 + 3, SEEK_SET); + x = fmt.bwt_size * 4; + fread_fix(fp, x, mem + k); k += x; + err_fclose(fp); + // sa + sprintf(fn, "%s.bytesa", idx_prefix); + fp = xopen(fn, "rb"); + err_fseek(fp, sizeof(bwtint_t) * 7, SEEK_SET); + x = SA_BYTES(fmt.n_sa); + fread_fix(fp, x, mem + k); k += x; + err_fclose(fp); + // kmer hash + sprintf(fn, "%s.kmer", idx_prefix); + fp = xopen(fn, "rb"); + x = (1 << (10 << 1)) * sizeof(KmerEntryArr); + fread_fix(fp, x, mem + k); k += x; + x = (1 << (11 << 1)) * sizeof(KmerEntry); + fread_fix(fp, x, mem + k); k += x; + x = (1 << (12 << 1)) * sizeof(KmerEntry); + fread_fix(fp, x, mem + k); k += x; +#if HASH_KMER_LEN > 12 + x = (1 << (13 << 1)) * sizeof(KmerEntry); + fread_fix(fp, x, mem + k); k += x; +#endif +#if HASH_KMER_LEN > 13 + x = (1 << (14 << 1)) * sizeof(KmerEntry); + fread_fix(fp, x, mem + k); k += x; +#endif + err_fclose(fp); + // bns + x = sizeof(bntseq_t); memcpy(mem + k, bns, x); k += x; + x = bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, bns->ambs, x); k += x; + x = bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, bns->anns, x); k += x; + for (i = 0; i < bns->n_seqs; ++i) { + x = strlen(bns->anns[i].name) + 1; memcpy(mem + k, bns->anns[i].name, x); k += x; + x = strlen(bns->anns[i].anno) + 1; memcpy(mem + k, bns->anns[i].anno, x); k += x; + } + // pac + x = bns->l_pac / 4 + 1; + err_fread_noeof(mem + k, 1, x, bns->fp_pac); k += x;// concatenated 2-bit encoded sequence + err_fclose(bns->fp_pac); + + fprintf(stderr, "k: %ld\n", k); + + return 0; +} + int main_shm(int argc, char *argv[]) { int c, to_list = 0, to_drop = 0, ret = 0, useERT = 0; - char *tmpfn = 0; char shm_prefix[PATH_MAX]; - while ((c = getopt(argc, argv, "ldf:Z")) >= 0) + while ((c = getopt(argc, argv, "ldZ")) >= 0) { if (c == 'l') to_list = 1; else if (c == 'd') to_drop = 1; - else if (c == 'f') tmpfn = optarg; else if (c == 'Z') useERT = 1; } if (optind == argc && !to_list && !to_drop) { fprintf(stderr, "\nUsage: fastbwa shm [-d|-l] [-f tmpFile] [idxbase]\n\n"); fprintf(stderr, "Options: -d destroy all indices in shared memory\n"); fprintf(stderr, " -l list names of indices in shared memory\n"); - fprintf(stderr, " -f FILE temporary file to reduce peak memory\n"); fprintf(stderr, " -Z use Ert as seeding index\n\n"); return 1; } @@ -259,16 +527,31 @@ int main_shm(int argc, char *argv[]) sprintf(shm_prefix, "%s", argv[optind]); if (bwa_shm_test(shm_prefix) == 0) { +#if 0 bwaidx_t *idx; if (useERT) idx = bwa_ertidx_load_from_disk(argv[optind]); else idx = bwa_idx_load_from_disk(argv[optind], BWA_IDX_BNS | BWA_IDX_PAC | BWA_IDX_FMT); - if (bwa_shm_stage(idx, shm_prefix, useERT, tmpfn) < 0) { + if (bwa_shm_stage(idx, shm_prefix, useERT) < 0) { fprintf(stderr, "[E::%s] failed to stage the index in shared memory\n", __func__); ret = 1; } bwa_idx_destroy(idx); +#else + if (useERT) { + if (bwa_shm_stage_ert(argv[optind]) < 0) { + fprintf(stderr, "[E::%s] failed to stage the index in shared memory\n", __func__); + ret = 1; + } + } + else { + if (bwa_shm_stage_fmt(argv[optind]) < 0) { + fprintf(stderr, "[E::%s] failed to stage the index in shared memory\n", __func__); + ret = 1; + } + } +#endif } else fprintf(stderr, "[M::%s] index '%s' is already in shared memory\n", __func__, argv[optind]); diff --git a/bwtindex.c b/bwtindex.c index 0ff19b9..eadd6b8 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -146,6 +146,57 @@ int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt return 0; } +int bwa_pac2bref(int argc, char *argv[]) +{ + FILE *fp_pac, *fp_bref; + uint64_t pac_size, pac_len, seq_len, bref_size; + int64_t i; + uint8_t *pac_buf, *bref_buf; + uint8_t c; + char pac_file_name[PATH_MAX]; + char binary_ref_name[PATH_MAX]; + if (optind + 1 > argc) + { + fprintf(stderr, "Usage: fastbwa pac2ref \n"); + return 1; + } + sprintf(pac_file_name, "%s.pac", argv[optind]); + sprintf(binary_ref_name, "%s.ert.0123", argv[optind]); + + fp_pac = xopen(pac_file_name, "rb"); + err_fseek(fp_pac, -1, SEEK_END); + pac_len = err_ftell(fp_pac); + err_fread_noeof(&c, 1, 1, fp_pac); + seq_len = (pac_len - 1) * 4 + (int)c; + + err_fseek(fp_pac, 0, SEEK_SET); + pac_size = (seq_len >> 2) + ((seq_len & 3) == 0 ? 0 : 1); + pac_buf = (uint8_t *)calloc(pac_size, 1); + err_fread_noeof(pac_buf, 1, pac_size, fp_pac); + err_fclose(fp_pac); + + bref_size = seq_len * 2; + bref_buf = (uint8_t *)malloc(bref_size); + for (i = 0; i < seq_len; ++i) { + int nt = pac_buf[i >> 2] >> ((3 - (i & 3)) << 1) & 3; + bref_buf[i] = nt; + } + + for (i = seq_len - 1; i >= 0; --i) { + int64_t j = bref_size - i - 1; + bref_buf[j] = 3 - bref_buf[i]; + } + + fp_bref = xopen(binary_ref_name, "wb"); + err_fwrite(bref_buf, sizeof(uint8_t), bref_size, fp_bref); + + free(bref_buf); + free(pac_buf); + err_fclose(fp_bref); + + return 0; +} + #define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3) void bwt_bwtupdate_core(bwt_t *bwt) diff --git a/ertseeding.c b/ertseeding.c index 49d59c6..0bc3645 100644 --- a/ertseeding.c +++ b/ertseeding.c @@ -3031,7 +3031,7 @@ void get_seeds_prefix(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, u64v* h sh.prev_prev_pivot = sh.prev_pivot; sh.prev_pivot = rm.start; memset(raux->lep, 0, 5 * sizeof(uint64_t)); - break; // zzh + // break; // zzh } #ifdef PRINT_SMEM ks_introsort(mem_smem_sort_lt_ert, smems->n, smems->a); // Sort SMEMs based on start pos in read. For DEBUG. @@ -3167,7 +3167,7 @@ void get_seeds(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, u64v* hits) { sh.prev_prev_pivot = sh.prev_pivot; sh.prev_pivot = rm.start; memset(raux->lep, 0, 5 * sizeof(uint64_t)); - break; // zzh + // break; // zzh } #ifdef PRINT_SMEM ks_introsort(mem_smem_sort_lt_ert, smems->n, smems->a); // Sort SMEMs based on start pos in read. For DEBUG. diff --git a/fastmap.c b/fastmap.c index 82fcb42..eaa3c07 100644 --- a/fastmap.c +++ b/fastmap.c @@ -78,7 +78,6 @@ typedef struct { long read_idx; long calc_idx; long write_idx; - int useERT; } ktp_aux_t; // read @@ -636,8 +635,7 @@ int main_mem(int argc, char *argv[]) } //fprintf(stderr, "%ld %ld %ld %ld %ld\n", aux.idx->fmt->L2[0], aux.idx->fmt->L2[1], aux.idx->fmt->L2[2], aux.idx->fmt->L2[3], aux.idx->fmt->L2[4]); - aux.useERT = useERT; - aux.w = init_mem_worker(opt, aux.idx->fmt, aux.idx->bns, aux.idx->pac); + aux.w = init_mem_worker(opt, aux.idx->fmt, aux.idx->ert, aux.idx->bns, aux.idx->pac, useERT); aux.data = calloc(2, sizeof(ktp_data_t)); bwa_print_sam_hdr(aux.idx->bns, hdr_line); diff --git a/fmt_idx.h b/fmt_idx.h index cb8436a..26b4c4d 100644 --- a/fmt_idx.h +++ b/fmt_idx.h @@ -117,4 +117,5 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in bwtint_t fmt_sa(const FMTIndex *fmt, bwtint_t k); bwtint_t fmt_sa_offset(const FMTIndex *fmt, bwtint_t k); +void fmt_gen_cnt_occ(FMTIndex *fmt); #endif \ No newline at end of file diff --git a/ksw.c b/ksw.c index 02f4f19..7f9cdeb 100644 --- a/ksw.c +++ b/ksw.c @@ -159,7 +159,7 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del _mm_store_si128(Hmax + i, zero); } // the core loop - PROF_START(loop); +// PROF_START(loop); for (i = 0; i < tlen; ++i) { int j, k, imax; __m128i e, h, t, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector @@ -223,8 +223,8 @@ end_loop16: } S = H1; H1 = H0; H0 = S; // swap H0 and H1 } - PROF_END(gprof[G_KSW_LOOP], loop); - PROF_START(end_loop); +// PROF_END(gprof[G_KSW_LOOP], loop); +// PROF_START(end_loop); r.score = gmax + q->shift < 255? gmax : 255; r.te = te; if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score @@ -245,7 +245,7 @@ end_loop16: } } free(b); - PROF_END(gprof[G_KSW_END_LOOP], end_loop); +// PROF_END(gprof[G_KSW_END_LOOP], end_loop); return r; } diff --git a/main.c b/main.c index ca98d99..0ba5df8 100644 --- a/main.c +++ b/main.c @@ -35,6 +35,7 @@ int bwa_fa2pac(int argc, char *argv[]); int bwa_pac2bwt(int argc, char *argv[]); +int bwa_pac2bref(int argc, char *argv[]); int bwa_bwtupdate(int argc, char *argv[]); int bwa_bwt2sa(int argc, char *argv[]); int bwa_bwt2bytesa(int argc, char *argv[]); @@ -78,6 +79,7 @@ static int usage() fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); fprintf(stderr, " pac2bwt generate BWT from PAC\n"); fprintf(stderr, " pac2bwtgen alternative algorithm for generating BWT\n"); + fprintf(stderr, " pac2bref generate binary reference from PAC\n"); fprintf(stderr, " bwtupdate update .bwt to the new format\n"); fprintf(stderr, " bwt2sa generate SA from BWT and Occ\n"); fprintf(stderr, " bwt2bytesa generate SA(using byte array) from BWT and Occ\n"); @@ -107,6 +109,7 @@ int main(int argc, char *argv[]) if (strcmp(argv[1], "fa2pac") == 0) ret = bwa_fa2pac(argc-1, argv+1); else if (strcmp(argv[1], "pac2bwt") == 0) ret = bwa_pac2bwt(argc-1, argv+1); else if (strcmp(argv[1], "pac2bwtgen") == 0) ret = bwt_bwtgen_main(argc-1, argv+1); + else if (strcmp(argv[1], "pac2bref") == 0) ret = bwa_pac2bref(argc-1, argv+1); else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1); else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1); else if (strcmp(argv[1], "bwt2bytesa") == 0) ret = bwa_bwt2bytesa(argc-1, argv+1); diff --git a/profiling.c b/profiling.c index 0689c79..f77a238 100644 --- a/profiling.c +++ b/profiling.c @@ -87,7 +87,7 @@ int display_stats(int nthreads) find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg); fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg); find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg); - fprintf(stderr, "time_fil_chain: %0.2lf s\n", avg); + fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg); find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg); fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg); find_opt(tprof[T_SAL], nthreads, &max, &min, &avg); diff --git a/run.sh b/run.sh index 60d5088..e850b9d 100755 --- a/run.sh +++ b/run.sh @@ -1,17 +1,19 @@ -thread=1 +thread=64 ## d1 k18<=4 89% #n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq #n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq #n_r1=~/data/fastq/dataset/na12878_wes_144/1w_1.fq #n_r2=~/data/fastq/dataset/na12878_wes_144/1w_2.fq -n_r1=~/data/fastq/dataset/na12878_wes_144/ss_1.fq -n_r2=~/data/fastq/dataset/na12878_wes_144/ss_2.fq +#n_r1=~/data/fastq/dataset/na12878_wes_144/ss_1.fq +#n_r2=~/data/fastq/dataset/na12878_wes_144/ss_2.fq #n_r1=~/data/fastq/dataset/na12878_wes_144/45m_r1.fq #n_r2=~/data/fastq/dataset/na12878_wes_144/45m_r2.fq #n_r1=~/data/fastq/dataset/na12878_wes_144/45mr1.fq.gz #n_r2=~/data/fastq/dataset/na12878_wes_144/45mr2.fq.gz ## d2 <= 4 87% +#n_r1=~/data/fastq/dataset/na12878_wgs_101/na12878_r1.fq +#n_r2=~/data/fastq/dataset/na12878_wgs_101/na12878_r2.fq #n_r1=~/data/fastq/dataset/na12878_wgs_101/s_1.fq #n_r2=~/data/fastq/dataset/na12878_wgs_101/s_2.fq #n_r1=~/data/fastq/dataset/na12878_wgs_101/45m_r1.fq @@ -31,8 +33,8 @@ n_r2=~/data/fastq/dataset/na12878_wes_144/ss_2.fq #n_r2=~/data/fastq/dataset/zy_wgs/45mr2.fq.gz #n_r1=~/data/fastq/dataset/zy_wgs/s_1.fq #n_r2=~/data/fastq/dataset/zy_wgs/s_2.fq -#n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq -#n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq +n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq +n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta #reference=~/reference/bwa/human_g1k_v37_decoy.fasta @@ -40,11 +42,12 @@ reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta #out=~/data1/out-u8-1.sam #out=~/data1/out-i16.sam -out=~/data1/fmt-out.sam +#out=~/data1/fmt-out.sam +out=~/data/fastbwa.ert.sam #out=/dev/null time ./fastbwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ - -Z $reference \ + $reference \ $n_r1 \ $n_r2 \ - -o $out -2 + -o $out -2 -Z