Merge branch 'master' into master_fixes. Merged up to r389.
Conflicts: bwamem.c kopen.c
This commit is contained in:
commit
e88529687f
3
NEWS
3
NEWS
|
|
@ -2,8 +2,7 @@ Release 0.7.4 (23 April, 2013)
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
This is a bugfix release. Most of bugs are considered to be minor which only
|
This is a bugfix release. Most of bugs are considered to be minor which only
|
||||||
very rarely.
|
occur very rarely.
|
||||||
ccur
|
|
||||||
|
|
||||||
* Bugfix: wrong CIGAR when a query sequence bridges three or more target
|
* Bugfix: wrong CIGAR when a query sequence bridges three or more target
|
||||||
sequences. This only happens when aligning reads to short assembly contigs.
|
sequences. This only happens when aligning reads to short assembly contigs.
|
||||||
|
|
|
||||||
23
bwamem.c
23
bwamem.c
|
|
@ -951,7 +951,7 @@ static void *worker2(void *data)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
worker_t *w;
|
worker_t *w;
|
||||||
|
|
@ -967,29 +967,30 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
|
||||||
p->seqs = seqs; p->regs = regs;
|
p->seqs = seqs; p->regs = regs;
|
||||||
p->pes = &pes[0];
|
p->pes = &pes[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_PTHREAD
|
#ifdef HAVE_PTHREAD
|
||||||
if (opt->n_threads == 1) {
|
if (opt->n_threads == 1) {
|
||||||
|
#endif
|
||||||
worker1(w);
|
worker1(w);
|
||||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
if (opt->flag&MEM_F_PE) { // paired-end mode
|
||||||
|
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
||||||
|
else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data
|
||||||
|
}
|
||||||
worker2(w);
|
worker2(w);
|
||||||
|
#ifdef HAVE_PTHREAD
|
||||||
} else {
|
} else {
|
||||||
pthread_t *tid;
|
pthread_t *tid;
|
||||||
tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t));
|
tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t));
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
if (opt->flag&MEM_F_PE) {
|
||||||
|
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t));
|
||||||
|
else mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||||
|
}
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
free(tid);
|
free(tid);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
worker1(w);
|
|
||||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
|
||||||
worker2(w);
|
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
err_fputs(seqs[i].sam, stdout);
|
|
||||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
|
||||||
}
|
|
||||||
free(regs); free(w);
|
free(regs); free(w);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
9
bwamem.h
9
bwamem.h
|
|
@ -57,8 +57,9 @@ typedef struct {
|
||||||
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int low, high, failed;
|
int low, high; // lower and upper bounds within which a read pair is considered to be properly paired
|
||||||
double avg, std;
|
int failed; // non-zero if the orientation is not supported by sufficient data
|
||||||
|
double avg, std; // mean and stddev of the insert size distribution
|
||||||
} mem_pestat_t;
|
} mem_pestat_t;
|
||||||
|
|
||||||
typedef struct { // This struct is only used for the convenience of API.
|
typedef struct { // This struct is only used for the convenience of API.
|
||||||
|
|
@ -103,8 +104,10 @@ extern "C" {
|
||||||
* @param pac 2-bit encoded reference
|
* @param pac 2-bit encoded reference
|
||||||
* @param n number of query sequences
|
* @param n number of query sequences
|
||||||
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
||||||
|
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
|
||||||
|
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
|
||||||
*/
|
*/
|
||||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the aligned regions for one query sequence
|
* Find the aligned regions for one query sequence
|
||||||
|
|
|
||||||
9
bwase.c
9
bwase.c
|
|
@ -181,6 +181,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
||||||
rb = re - tle; rlen = tle;
|
rb = re - tle; rlen = tle;
|
||||||
seq_reverse(len, seq, 0);
|
seq_reverse(len, seq, 0);
|
||||||
seq_reverse(rlen, rseq, 0);
|
seq_reverse(rlen, rseq, 0);
|
||||||
|
if (rlen == 0) goto refine_gapped_err;
|
||||||
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
|
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
|
||||||
if (qle < len) { // write soft clip
|
if (qle < len) { // write soft clip
|
||||||
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
||||||
|
|
@ -195,6 +196,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
||||||
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0);
|
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0);
|
||||||
if (gscore > 0) tle = gtle, qle = len;
|
if (gscore > 0) tle = gtle, qle = len;
|
||||||
re = rb + tle; rlen = tle;
|
re = rb + tle; rlen = tle;
|
||||||
|
if (rlen == 0) goto refine_gapped_err;
|
||||||
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
|
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
|
||||||
if (qle < len) {
|
if (qle < len) {
|
||||||
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
||||||
|
|
@ -209,6 +211,11 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
||||||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||||
free(rseq);
|
free(rseq);
|
||||||
return cigar;
|
return cigar;
|
||||||
|
|
||||||
|
refine_gapped_err:
|
||||||
|
free(rseq);
|
||||||
|
*n_cigar = 0;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
|
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
|
||||||
|
|
@ -320,6 +327,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
|
||||||
}
|
}
|
||||||
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
|
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
|
||||||
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
|
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
|
||||||
|
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
|
||||||
}
|
}
|
||||||
// generate MD tag
|
// generate MD tag
|
||||||
str = (kstring_t*)xcalloc(1, sizeof(kstring_t));
|
str = (kstring_t*)xcalloc(1, sizeof(kstring_t));
|
||||||
|
|
@ -475,6 +483,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
||||||
for (i = 0; i < p->n_multi; ++i) {
|
for (i = 0; i < p->n_multi; ++i) {
|
||||||
bwt_multi1_t *q = p->multi + i;
|
bwt_multi1_t *q = p->multi + i;
|
||||||
int k;
|
int k;
|
||||||
|
if (q->cigar == 0) continue;
|
||||||
j = pos_end_multi(q, p->len) - q->pos;
|
j = pos_end_multi(q, p->len) - q->pos;
|
||||||
nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
|
nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
|
||||||
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
|
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
|
||||||
|
|
|
||||||
16
fastmap.c
16
fastmap.c
|
|
@ -91,9 +91,12 @@ int main_mem(int argc, char *argv[])
|
||||||
|
|
||||||
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
||||||
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
||||||
bwa_print_sam_hdr(idx->bns, rg_line);
|
|
||||||
|
|
||||||
ko = kopen(argv[optind + 1], &fd);
|
ko = kopen(argv[optind + 1], &fd);
|
||||||
|
if (ko == 0) {
|
||||||
|
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
fp = gzdopen(fd, "r");
|
fp = gzdopen(fd, "r");
|
||||||
ks = kseq_init(fp);
|
ks = kseq_init(fp);
|
||||||
if (optind + 2 < argc) {
|
if (optind + 2 < argc) {
|
||||||
|
|
@ -102,11 +105,16 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__);
|
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__);
|
||||||
} else {
|
} else {
|
||||||
ko2 = kopen(argv[optind + 2], &fd2);
|
ko2 = kopen(argv[optind + 2], &fd2);
|
||||||
|
if (ko2 == 0) {
|
||||||
|
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
fp2 = gzdopen(fd2, "r");
|
fp2 = gzdopen(fd2, "r");
|
||||||
ks2 = kseq_init(fp2);
|
ks2 = kseq_init(fp2);
|
||||||
opt->flag |= MEM_F_PE;
|
opt->flag |= MEM_F_PE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bwa_print_sam_hdr(idx->bns, rg_line);
|
||||||
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
||||||
int64_t size = 0;
|
int64_t size = 0;
|
||||||
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
|
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
|
||||||
|
|
@ -121,7 +129,11 @@ int main_mem(int argc, char *argv[])
|
||||||
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
|
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
|
||||||
if (bwa_verbose >= 3)
|
if (bwa_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
||||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
|
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
err_fputs(seqs[i].sam, stdout);
|
||||||
|
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||||
|
}
|
||||||
free(seqs);
|
free(seqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
4
kopen.c
4
kopen.c
|
|
@ -319,14 +319,14 @@ void *kopen(const char *fn, int *_fd)
|
||||||
#else
|
#else
|
||||||
*_fd = open(fn, O_RDONLY);
|
*_fd = open(fn, O_RDONLY);
|
||||||
#endif
|
#endif
|
||||||
if (*_fd) {
|
if (*_fd >= 0) {
|
||||||
aux = xcalloc(1, sizeof(koaux_t));
|
aux = xcalloc(1, sizeof(koaux_t));
|
||||||
aux->type = KO_FILE;
|
aux->type = KO_FILE;
|
||||||
aux->fd = *_fd;
|
aux->fd = *_fd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*_fd = aux->fd;
|
if (aux) *_fd = aux->fd;
|
||||||
return aux;
|
return aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
4
main.c
4
main.c
|
|
@ -3,7 +3,7 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef PACKAGE_VERSION
|
#ifndef PACKAGE_VERSION
|
||||||
#define PACKAGE_VERSION "0.7.4-r385"
|
#define PACKAGE_VERSION "0.7.4-r389-beta"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[]);
|
int bwa_fa2pac(int argc, char *argv[]);
|
||||||
|
|
@ -92,5 +92,5 @@ int main(int argc, char *argv[])
|
||||||
fprintf(stderr, " %s", argv[i]);
|
fprintf(stderr, " %s", argv[i]);
|
||||||
fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
|
fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
|
||||||
}
|
}
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue