Merge branch 'master' into master_fixes. Merged up to r389.
Conflicts: bwamem.c kopen.c
This commit is contained in:
commit
e88529687f
3
NEWS
3
NEWS
|
|
@ -2,8 +2,7 @@ Release 0.7.4 (23 April, 2013)
|
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This is a bugfix release. Most of bugs are considered to be minor which only
|
||||
very rarely.
|
||||
ccur
|
||||
occur very rarely.
|
||||
|
||||
* Bugfix: wrong CIGAR when a query sequence bridges three or more target
|
||||
sequences. This only happens when aligning reads to short assembly contigs.
|
||||
|
|
|
|||
23
bwamem.c
23
bwamem.c
|
|
@ -951,7 +951,7 @@ static void *worker2(void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
|
||||
{
|
||||
int i;
|
||||
worker_t *w;
|
||||
|
|
@ -967,29 +967,30 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
|
|||
p->seqs = seqs; p->regs = regs;
|
||||
p->pes = &pes[0];
|
||||
}
|
||||
|
||||
#ifdef HAVE_PTHREAD
|
||||
if (opt->n_threads == 1) {
|
||||
#endif
|
||||
worker1(w);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
if (opt->flag&MEM_F_PE) { // paired-end mode
|
||||
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
||||
else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data
|
||||
}
|
||||
worker2(w);
|
||||
#ifdef HAVE_PTHREAD
|
||||
} else {
|
||||
pthread_t *tid;
|
||||
tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t));
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
if (opt->flag&MEM_F_PE) {
|
||||
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t));
|
||||
else mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
}
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||
free(tid);
|
||||
}
|
||||
#else
|
||||
worker1(w);
|
||||
if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes);
|
||||
worker2(w);
|
||||
#endif
|
||||
for (i = 0; i < n; ++i) {
|
||||
err_fputs(seqs[i].sam, stdout);
|
||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||
}
|
||||
free(regs); free(w);
|
||||
}
|
||||
|
|
|
|||
9
bwamem.h
9
bwamem.h
|
|
@ -57,8 +57,9 @@ typedef struct {
|
|||
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
||||
|
||||
typedef struct {
|
||||
int low, high, failed;
|
||||
double avg, std;
|
||||
int low, high; // lower and upper bounds within which a read pair is considered to be properly paired
|
||||
int failed; // non-zero if the orientation is not supported by sufficient data
|
||||
double avg, std; // mean and stddev of the insert size distribution
|
||||
} mem_pestat_t;
|
||||
|
||||
typedef struct { // This struct is only used for the convenience of API.
|
||||
|
|
@ -103,8 +104,10 @@ extern "C" {
|
|||
* @param pac 2-bit encoded reference
|
||||
* @param n number of query sequences
|
||||
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
||||
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
|
||||
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
|
||||
*/
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
|
||||
|
||||
/**
|
||||
* Find the aligned regions for one query sequence
|
||||
|
|
|
|||
9
bwase.c
9
bwase.c
|
|
@ -181,6 +181,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
|||
rb = re - tle; rlen = tle;
|
||||
seq_reverse(len, seq, 0);
|
||||
seq_reverse(rlen, rseq, 0);
|
||||
if (rlen == 0) goto refine_gapped_err;
|
||||
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
|
||||
if (qle < len) { // write soft clip
|
||||
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
||||
|
|
@ -195,6 +196,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
|||
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0);
|
||||
if (gscore > 0) tle = gtle, qle = len;
|
||||
re = rb + tle; rlen = tle;
|
||||
if (rlen == 0) goto refine_gapped_err;
|
||||
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
|
||||
if (qle < len) {
|
||||
cigar = xrealloc(cigar, (*n_cigar + 1) * 4);
|
||||
|
|
@ -209,6 +211,11 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
|||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||
free(rseq);
|
||||
return cigar;
|
||||
|
||||
refine_gapped_err:
|
||||
free(rseq);
|
||||
*n_cigar = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
|
||||
|
|
@ -320,6 +327,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
|
|||
}
|
||||
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
|
||||
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
|
||||
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
|
||||
}
|
||||
// generate MD tag
|
||||
str = (kstring_t*)xcalloc(1, sizeof(kstring_t));
|
||||
|
|
@ -475,6 +483,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
|||
for (i = 0; i < p->n_multi; ++i) {
|
||||
bwt_multi1_t *q = p->multi + i;
|
||||
int k;
|
||||
if (q->cigar == 0) continue;
|
||||
j = pos_end_multi(q, p->len) - q->pos;
|
||||
nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
|
||||
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
|
||||
|
|
|
|||
16
fastmap.c
16
fastmap.c
|
|
@ -91,9 +91,12 @@ int main_mem(int argc, char *argv[])
|
|||
|
||||
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
||||
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
||||
bwa_print_sam_hdr(idx->bns, rg_line);
|
||||
|
||||
ko = kopen(argv[optind + 1], &fd);
|
||||
if (ko == 0) {
|
||||
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
|
||||
return 1;
|
||||
}
|
||||
fp = gzdopen(fd, "r");
|
||||
ks = kseq_init(fp);
|
||||
if (optind + 2 < argc) {
|
||||
|
|
@ -102,11 +105,16 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file will be ignored.\n", __func__);
|
||||
} else {
|
||||
ko2 = kopen(argv[optind + 2], &fd2);
|
||||
if (ko2 == 0) {
|
||||
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
|
||||
return 1;
|
||||
}
|
||||
fp2 = gzdopen(fd2, "r");
|
||||
ks2 = kseq_init(fp2);
|
||||
opt->flag |= MEM_F_PE;
|
||||
}
|
||||
}
|
||||
bwa_print_sam_hdr(idx->bns, rg_line);
|
||||
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
||||
int64_t size = 0;
|
||||
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
|
||||
|
|
@ -121,7 +129,11 @@ int main_mem(int argc, char *argv[])
|
|||
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
|
||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
|
||||
for (i = 0; i < n; ++i) {
|
||||
err_fputs(seqs[i].sam, stdout);
|
||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||
}
|
||||
free(seqs);
|
||||
}
|
||||
|
||||
|
|
|
|||
4
kopen.c
4
kopen.c
|
|
@ -319,14 +319,14 @@ void *kopen(const char *fn, int *_fd)
|
|||
#else
|
||||
*_fd = open(fn, O_RDONLY);
|
||||
#endif
|
||||
if (*_fd) {
|
||||
if (*_fd >= 0) {
|
||||
aux = xcalloc(1, sizeof(koaux_t));
|
||||
aux->type = KO_FILE;
|
||||
aux->fd = *_fd;
|
||||
}
|
||||
}
|
||||
}
|
||||
*_fd = aux->fd;
|
||||
if (aux) *_fd = aux->fd;
|
||||
return aux;
|
||||
}
|
||||
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -3,7 +3,7 @@
|
|||
#include "utils.h"
|
||||
|
||||
#ifndef PACKAGE_VERSION
|
||||
#define PACKAGE_VERSION "0.7.4-r385"
|
||||
#define PACKAGE_VERSION "0.7.4-r389-beta"
|
||||
#endif
|
||||
|
||||
int bwa_fa2pac(int argc, char *argv[]);
|
||||
|
|
@ -92,5 +92,5 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " %s", argv[i]);
|
||||
fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
|
||||
}
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue