Merge branch 'master' into master_fixes. Merged up to r389.

Conflicts:
	bwamem.c
	kopen.c
This commit is contained in:
Rob Davies 2013-04-29 12:09:30 +01:00
commit e88529687f
7 changed files with 46 additions and 22 deletions

3
NEWS
View File

@ -2,8 +2,7 @@ Release 0.7.4 (23 April, 2013)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a bugfix release. Most of bugs are considered to be minor which only This is a bugfix release. Most of bugs are considered to be minor which only
very rarely. occur very rarely.
ccur
* Bugfix: wrong CIGAR when a query sequence bridges three or more target * Bugfix: wrong CIGAR when a query sequence bridges three or more target
sequences. This only happens when aligning reads to short assembly contigs. sequences. This only happens when aligning reads to short assembly contigs.

View File

@ -951,7 +951,7 @@ static void *worker2(void *data)
return 0; return 0;
} }
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs) void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
{ {
int i; int i;
worker_t *w; worker_t *w;
@ -967,29 +967,30 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
p->seqs = seqs; p->regs = regs; p->seqs = seqs; p->regs = regs;
p->pes = &pes[0]; p->pes = &pes[0];
} }
#ifdef HAVE_PTHREAD #ifdef HAVE_PTHREAD
if (opt->n_threads == 1) { if (opt->n_threads == 1) {
#endif
worker1(w); worker1(w);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); if (opt->flag&MEM_F_PE) { // paired-end mode
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data
}
worker2(w); worker2(w);
#ifdef HAVE_PTHREAD
} else { } else {
pthread_t *tid; pthread_t *tid;
tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t)); tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t));
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); if (opt->flag&MEM_F_PE) {
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t));
else mem_pestat(opt, bns->l_pac, n, regs, pes);
}
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
free(tid); free(tid);
} }
#else
worker1(w);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
#endif #endif
for (i = 0; i < n; ++i) {
err_fputs(seqs[i].sam, stdout);
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
}
free(regs); free(w); free(regs); free(w);
} }

View File

@ -57,8 +57,9 @@ typedef struct {
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v; typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
typedef struct { typedef struct {
int low, high, failed; int low, high; // lower and upper bounds within which a read pair is considered to be properly paired
double avg, std; int failed; // non-zero if the orientation is not supported by sufficient data
double avg, std; // mean and stddev of the insert size distribution
} mem_pestat_t; } mem_pestat_t;
typedef struct { // This struct is only used for the convenience of API. typedef struct { // This struct is only used for the convenience of API.
@ -103,8 +104,10 @@ extern "C" {
* @param pac 2-bit encoded reference * @param pac 2-bit encoded reference
* @param n number of query sequences * @param n number of query sequences
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call * @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
*/ */
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs); void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
/** /**
* Find the aligned regions for one query sequence * Find the aligned regions for one query sequence

View File

@ -181,6 +181,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rb = re - tle; rlen = tle; rb = re - tle; rlen = tle;
seq_reverse(len, seq, 0); seq_reverse(len, seq, 0);
seq_reverse(rlen, rseq, 0); seq_reverse(rlen, rseq, 0);
if (rlen == 0) goto refine_gapped_err;
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
if (qle < len) { // write soft clip if (qle < len) { // write soft clip
cigar = xrealloc(cigar, (*n_cigar + 1) * 4); cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
@ -195,6 +196,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, &gtle, &gscore, 0); ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len; if (gscore > 0) tle = gtle, qle = len;
re = rb + tle; rlen = tle; re = rb + tle; rlen = tle;
if (rlen == 0) goto refine_gapped_err;
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
if (qle < len) { if (qle < len) {
cigar = xrealloc(cigar, (*n_cigar + 1) * 4); cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
@ -209,6 +211,11 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4)); cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
free(rseq); free(rseq);
return cigar; return cigar;
refine_gapped_err:
free(rseq);
*n_cigar = 0;
return 0;
} }
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq, char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
@ -320,6 +327,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
} }
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue; if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand); s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
} }
// generate MD tag // generate MD tag
str = (kstring_t*)xcalloc(1, sizeof(kstring_t)); str = (kstring_t*)xcalloc(1, sizeof(kstring_t));
@ -475,6 +483,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
for (i = 0; i < p->n_multi; ++i) { for (i = 0; i < p->n_multi; ++i) {
bwt_multi1_t *q = p->multi + i; bwt_multi1_t *q = p->multi + i;
int k; int k;
if (q->cigar == 0) continue;
j = pos_end_multi(q, p->len) - q->pos; j = pos_end_multi(q, p->len) - q->pos;
nn = bns_cnt_ambi(bns, q->pos, j, &seqid); nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',

View File

@ -91,9 +91,12 @@ int main_mem(int argc, char *argv[])
bwa_fill_scmat(opt->a, opt->b, opt->mat); bwa_fill_scmat(opt->a, opt->b, opt->mat);
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
bwa_print_sam_hdr(idx->bns, rg_line);
ko = kopen(argv[optind + 1], &fd); ko = kopen(argv[optind + 1], &fd);
if (ko == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
return 1;
}
fp = gzdopen(fd, "r"); fp = gzdopen(fd, "r");
ks = kseq_init(fp); ks = kseq_init(fp);
if (optind + 2 < argc) { if (optind + 2 < argc) {
@ -102,11 +105,16 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__); fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__);
} else { } else {
ko2 = kopen(argv[optind + 2], &fd2); ko2 = kopen(argv[optind + 2], &fd2);
if (ko2 == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
return 1;
}
fp2 = gzdopen(fd2, "r"); fp2 = gzdopen(fd2, "r");
ks2 = kseq_init(fp2); ks2 = kseq_init(fp2);
opt->flag |= MEM_F_PE; opt->flag |= MEM_F_PE;
} }
} }
bwa_print_sam_hdr(idx->bns, rg_line);
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) { while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
int64_t size = 0; int64_t size = 0;
if ((opt->flag & MEM_F_PE) && (n&1) == 1) { if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
@ -121,7 +129,11 @@ int main_mem(int argc, char *argv[])
for (i = 0; i < n; ++i) size += seqs[i].l_seq; for (i = 0; i < n; ++i) size += seqs[i].l_seq;
if (bwa_verbose >= 3) if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size); fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs); mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
for (i = 0; i < n; ++i) {
err_fputs(seqs[i].sam, stdout);
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
}
free(seqs); free(seqs);
} }

View File

@ -319,14 +319,14 @@ void *kopen(const char *fn, int *_fd)
#else #else
*_fd = open(fn, O_RDONLY); *_fd = open(fn, O_RDONLY);
#endif #endif
if (*_fd) { if (*_fd >= 0) {
aux = xcalloc(1, sizeof(koaux_t)); aux = xcalloc(1, sizeof(koaux_t));
aux->type = KO_FILE; aux->type = KO_FILE;
aux->fd = *_fd; aux->fd = *_fd;
} }
} }
} }
*_fd = aux->fd; if (aux) *_fd = aux->fd;
return aux; return aux;
} }

4
main.c
View File

@ -3,7 +3,7 @@
#include "utils.h" #include "utils.h"
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.4-r385" #define PACKAGE_VERSION "0.7.4-r389-beta"
#endif #endif
int bwa_fa2pac(int argc, char *argv[]); int bwa_fa2pac(int argc, char *argv[]);
@ -92,5 +92,5 @@ int main(int argc, char *argv[])
fprintf(stderr, " %s", argv[i]); fprintf(stderr, " %s", argv[i]);
fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime()); fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
} }
return 0; return ret;
} }