Merge branch 'master' into master_fixes. Merged up to r389.

Conflicts:
	bwamem.c
	kopen.c
This commit is contained in:
Rob Davies 2013-04-29 12:09:30 +01:00
commit e88529687f
7 changed files with 46 additions and 22 deletions

3
NEWS
View File

@ -2,8 +2,7 @@ Release 0.7.4 (23 April, 2013)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a bugfix release. Most of bugs are considered to be minor which only
very rarely.
ccur
occur very rarely.
* Bugfix: wrong CIGAR when a query sequence bridges three or more target
sequences. This only happens when aligning reads to short assembly contigs.

View File

@ -951,7 +951,7 @@ static void *worker2(void *data)
return 0;
}
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
{
int i;
worker_t *w;
@ -967,29 +967,30 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
p->seqs = seqs; p->regs = regs;
p->pes = &pes[0];
}
#ifdef HAVE_PTHREAD
if (opt->n_threads == 1) {
#endif
worker1(w);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
if (opt->flag&MEM_F_PE) { // paired-end mode
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data
}
worker2(w);
#ifdef HAVE_PTHREAD
} else {
pthread_t *tid;
tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t));
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
if (opt->flag&MEM_F_PE) {
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t));
else mem_pestat(opt, bns->l_pac, n, regs, pes);
}
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
free(tid);
}
#else
worker1(w);
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
#endif
for (i = 0; i < n; ++i) {
err_fputs(seqs[i].sam, stdout);
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
}
free(regs); free(w);
}

View File

@ -57,8 +57,9 @@ typedef struct {
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
typedef struct {
int low, high, failed;
double avg, std;
int low, high; // lower and upper bounds within which a read pair is considered to be properly paired
int failed; // non-zero if the orientation is not supported by sufficient data
double avg, std; // mean and stddev of the insert size distribution
} mem_pestat_t;
typedef struct { // This struct is only used for the convenience of API.
@ -103,8 +104,10 @@ extern "C" {
* @param pac 2-bit encoded reference
* @param n number of query sequences
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
*/
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
/**
* Find the aligned regions for one query sequence

View File

@ -181,6 +181,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rb = re - tle; rlen = tle;
seq_reverse(len, seq, 0);
seq_reverse(rlen, rseq, 0);
if (rlen == 0) goto refine_gapped_err;
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
if (qle < len) { // write soft clip
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
@ -195,6 +196,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len;
re = rb + tle; rlen = tle;
if (rlen == 0) goto refine_gapped_err;
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
if (qle < len) {
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
@ -209,6 +211,11 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
free(rseq);
return cigar;
refine_gapped_err:
free(rseq);
*n_cigar = 0;
return 0;
}
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
@ -320,6 +327,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
}
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
}
// generate MD tag
str = (kstring_t*)xcalloc(1, sizeof(kstring_t));
@ -475,6 +483,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
for (i = 0; i < p->n_multi; ++i) {
bwt_multi1_t *q = p->multi + i;
int k;
if (q->cigar == 0) continue;
j = pos_end_multi(q, p->len) - q->pos;
nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',

View File

@ -91,9 +91,12 @@ int main_mem(int argc, char *argv[])
bwa_fill_scmat(opt->a, opt->b, opt->mat);
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
bwa_print_sam_hdr(idx->bns, rg_line);
ko = kopen(argv[optind + 1], &fd);
if (ko == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
return 1;
}
fp = gzdopen(fd, "r");
ks = kseq_init(fp);
if (optind + 2 < argc) {
@ -102,11 +105,16 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__);
} else {
ko2 = kopen(argv[optind + 2], &fd2);
if (ko2 == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
return 1;
}
fp2 = gzdopen(fd2, "r");
ks2 = kseq_init(fp2);
opt->flag |= MEM_F_PE;
}
}
bwa_print_sam_hdr(idx->bns, rg_line);
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
int64_t size = 0;
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
@ -121,7 +129,11 @@ int main_mem(int argc, char *argv[])
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
for (i = 0; i < n; ++i) {
err_fputs(seqs[i].sam, stdout);
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
}
free(seqs);
}

View File

@ -319,14 +319,14 @@ void *kopen(const char *fn, int *_fd)
#else
*_fd = open(fn, O_RDONLY);
#endif
if (*_fd) {
if (*_fd >= 0) {
aux = xcalloc(1, sizeof(koaux_t));
aux->type = KO_FILE;
aux->fd = *_fd;
}
}
}
*_fd = aux->fd;
if (aux) *_fd = aux->fd;
return aux;
}

4
main.c
View File

@ -3,7 +3,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.4-r385"
#define PACKAGE_VERSION "0.7.4-r389-beta"
#endif
int bwa_fa2pac(int argc, char *argv[]);
@ -92,5 +92,5 @@ int main(int argc, char *argv[])
fprintf(stderr, " %s", argv[i]);
fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
}
return 0;
return ret;
}