diff --git a/NEWS b/NEWS index 42e7c79..86f4114 100644 --- a/NEWS +++ b/NEWS @@ -2,8 +2,7 @@ Release 0.7.4 (23 April, 2013) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a bugfix release. Most of bugs are considered to be minor which only - very rarely. - ccur +occur very rarely. * Bugfix: wrong CIGAR when a query sequence bridges three or more target sequences. This only happens when aligning reads to short assembly contigs. diff --git a/bwamem.c b/bwamem.c index 8c2ba9c..9f3aa9b 100644 --- a/bwamem.c +++ b/bwamem.c @@ -951,7 +951,7 @@ static void *worker2(void *data) return 0; } -void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs) +void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0) { int i; worker_t *w; @@ -967,29 +967,30 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn p->seqs = seqs; p->regs = regs; p->pes = &pes[0]; } + #ifdef HAVE_PTHREAD if (opt->n_threads == 1) { +#endif worker1(w); - if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); + if (opt->flag&MEM_F_PE) { // paired-end mode + if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 + else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data + } worker2(w); +#ifdef HAVE_PTHREAD } else { pthread_t *tid; tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t)); for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); - if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); + if (opt->flag&MEM_F_PE) { + if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); + else mem_pestat(opt, bns->l_pac, n, regs, pes); + } for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); free(tid); } -#else - worker1(w); - if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); - worker2(w); #endif - for (i = 0; i < n; ++i) { - err_fputs(seqs[i].sam, stdout); - free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); - } free(regs); free(w); } diff --git a/bwamem.h b/bwamem.h index 40f47f8..76be8e3 100644 --- a/bwamem.h +++ b/bwamem.h @@ -57,8 +57,9 @@ typedef struct { typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v; typedef struct { - int low, high, failed; - double avg, std; + int low, high; // lower and upper bounds within which a read pair is considered to be properly paired + int failed; // non-zero if the orientation is not supported by sufficient data + double avg, std; // mean and stddev of the insert size distribution } mem_pestat_t; typedef struct { // This struct is only used for the convenience of API. @@ -103,8 +104,10 @@ extern "C" { * @param pac 2-bit encoded reference * @param n number of query sequences * @param seqs query sequences; $seqs[i].seq/sam to be modified after the call + * @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements, + * corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info. */ - void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs); + void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0); /** * Find the aligned regions for one query sequence diff --git a/bwase.c b/bwase.c index 85efcbd..41ab175 100644 --- a/bwase.c +++ b/bwase.c @@ -181,6 +181,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l rb = re - tle; rlen = tle; seq_reverse(len, seq, 0); seq_reverse(rlen, rseq, 0); + if (rlen == 0) goto refine_gapped_err; ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); if (qle < len) { // write soft clip cigar = xrealloc(cigar, (*n_cigar + 1) * 4); @@ -195,6 +196,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0); if (gscore > 0) tle = gtle, qle = len; re = rb + tle; rlen = tle; + if (rlen == 0) goto refine_gapped_err; ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension if (qle < len) { cigar = xrealloc(cigar, (*n_cigar + 1) * 4); @@ -209,6 +211,11 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4)); free(rseq); return cigar; + +refine_gapped_err: + free(rseq); + *n_cigar = 0; + return 0; } char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq, @@ -320,6 +327,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t } if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue; s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand); + if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH; } // generate MD tag str = (kstring_t*)xcalloc(1, sizeof(kstring_t)); @@ -475,6 +483,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in for (i = 0; i < p->n_multi; ++i) { bwt_multi1_t *q = p->multi + i; int k; + if (q->cigar == 0) continue; j = pos_end_multi(q, p->len) - q->pos; nn = bns_cnt_ambi(bns, q->pos, j, &seqid); err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', diff --git a/fastmap.c b/fastmap.c index 7ff73cb..d0651f9 100644 --- a/fastmap.c +++ b/fastmap.c @@ -91,9 +91,12 @@ int main_mem(int argc, char *argv[]) bwa_fill_scmat(opt->a, opt->b, opt->mat); if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak - bwa_print_sam_hdr(idx->bns, rg_line); ko = kopen(argv[optind + 1], &fd); + if (ko == 0) { + if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]); + return 1; + } fp = gzdopen(fd, "r"); ks = kseq_init(fp); if (optind + 2 < argc) { @@ -102,11 +105,16 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__); } else { ko2 = kopen(argv[optind + 2], &fd2); + if (ko2 == 0) { + if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]); + return 1; + } fp2 = gzdopen(fd2, "r"); ks2 = kseq_init(fp2); opt->flag |= MEM_F_PE; } } + bwa_print_sam_hdr(idx->bns, rg_line); while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) { int64_t size = 0; if ((opt->flag & MEM_F_PE) && (n&1) == 1) { @@ -121,7 +129,11 @@ int main_mem(int argc, char *argv[]) for (i = 0; i < n; ++i) size += seqs[i].l_seq; if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size); - mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs); + mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0); + for (i = 0; i < n; ++i) { + err_fputs(seqs[i].sam, stdout); + free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); + } free(seqs); } diff --git a/kopen.c b/kopen.c index c1c43a8..82f2812 100644 --- a/kopen.c +++ b/kopen.c @@ -319,14 +319,14 @@ void *kopen(const char *fn, int *_fd) #else *_fd = open(fn, O_RDONLY); #endif - if (*_fd) { + if (*_fd >= 0) { aux = xcalloc(1, sizeof(koaux_t)); aux->type = KO_FILE; aux->fd = *_fd; } } } - *_fd = aux->fd; + if (aux) *_fd = aux->fd; return aux; } diff --git a/main.c b/main.c index 502af35..cc1a4b4 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.4-r385" +#define PACKAGE_VERSION "0.7.4-r389-beta" #endif int bwa_fa2pac(int argc, char *argv[]); @@ -92,5 +92,5 @@ int main(int argc, char *argv[]) fprintf(stderr, " %s", argv[i]); fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime()); } - return 0; + return ret; }