diff --git a/bntseq.c b/bntseq.c index eddae84..548a7fd 100644 --- a/bntseq.c +++ b/bntseq.c @@ -94,7 +94,7 @@ void bns_dump(const bntseq_t *bns, const char *prefix) bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename) { - char str[1024]; + char str[8192]; FILE *fp; const char *fname; bntseq_t *bns; @@ -117,14 +117,14 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c if (scanres != 2) goto badread; p->name = strdup(str); // read fasta comments - while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; + while (q - str < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; while (c != '\n' && c != EOF) c = fgetc(fp); if (c == EOF) { scanres = EOF; goto badread; } *q = 0; - if (q - str > 1) p->anno = strdup(str + 1); // skip leading space + if (q - str > 1 && strcmp(str, " (null)") != 0) p->anno = strdup(str + 1); // skip leading space else p->anno = strdup(""); // read the rest scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); diff --git a/bwamem.c b/bwamem.c index f8c5695..8377122 100644 --- a/bwamem.c +++ b/bwamem.c @@ -759,7 +759,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar) return l; } -void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all) +void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_) { int i, l_name; mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert @@ -788,7 +788,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m if (p->n_cigar) { // aligned for (i = 0; i < p->n_cigar; ++i) { int c = p->cigar[i]&0xf; - if (!softclip_all && (c == 3 || c == 4)) + if (!(opt->flag&MEM_F_SOFTCLIP) && (c == 3 || c == 4)) c = which? 4 : 3; // use hard clipping for supplementary alignments kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str); } @@ -816,7 +816,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m kputsn("*\t*", 3, str); } else if (!p->is_rev) { // the forward strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all + if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) { // have cigar && not the primary alignment && not softclip all if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4; if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4; } @@ -830,7 +830,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m } else kputc('*', str); } else { // the reverse strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar && which && !softclip_all) { + if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) { if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4; if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4; } @@ -875,6 +875,14 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m } if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); } if (s->comment) { kputc('\t', str); kputs(s->comment, str); } + if ((opt->flag&MEM_F_REF_HDR) && p->rid >= 0 && bns->anns[p->rid].anno != 0 && bns->anns[p->rid].anno[0] != 0) { + int tmp; + kputsn("\tXR:Z:", 6, str); + tmp = str->l; + kputs(bns->anns[p->rid].anno, str); + for (i = tmp; i < str->l; ++i) // replace TAB in the comment to SPACE + if (str->s[i] == '\t') str->s[i] = ' '; + } kputc('\n', str); } @@ -941,10 +949,10 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa mem_aln_t t; t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0); t.flag |= extra_flag; - mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP); + mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m); } else { for (k = 0; k < aa.n; ++k) - mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP); + mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m); for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar); free(aa.a); } diff --git a/bwamem.h b/bwamem.h index 0628bfb..61bb335 100644 --- a/bwamem.h +++ b/bwamem.h @@ -18,6 +18,7 @@ typedef struct __smem_i smem_i; #define MEM_F_NO_RESCUE 0x20 #define MEM_F_SELF_OVLP 0x40 #define MEM_F_ALN_REG 0x80 +#define MEM_F_REF_HDR 0x100 #define MEM_F_SOFTCLIP 0x200 typedef struct { diff --git a/bwamem_pair.c b/bwamem_pair.c index a3aeb80..8fe2dd0 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -247,7 +247,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a); extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); - extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all); + extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query); int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1; @@ -327,8 +327,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co // write SAM h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0; h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0; - mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0; - mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s; + mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0; + mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s; if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); // h[0].XA will be freed in the following block free(h[1].cigar); diff --git a/fastmap.c b/fastmap.c index 4f5a07c..c141383 100644 --- a/fastmap.c +++ b/fastmap.c @@ -54,7 +54,7 @@ int main_mem(int argc, char *argv[]) opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) { + while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == 'x') mode = optarg; else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; @@ -71,6 +71,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP; else if (c == 'F') opt->flag |= MEM_F_ALN_REG; else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP; + else if (c == 'V') opt->flag |= MEM_F_REF_HDR; else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1; else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1; else if (c == 'v') bwa_verbose = atoi(optarg); @@ -163,6 +164,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -h INT if there are 80%% of the max score, output all in XA [%d]\n", opt->max_hits); fprintf(stderr, " -a output all alignments for SE or unpaired PE\n"); fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n"); + fprintf(stderr, " -V output the reference FASTA header in the XR tag\n"); fprintf(stderr, " -Y use soft clipping for supplementary alignments\n"); fprintf(stderr, " -M mark shorter split hits as secondary\n\n"); fprintf(stderr, " -I FLOAT[,FLOAT[,INT[,INT]]]\n"); diff --git a/main.c b/main.c index 4001a4f..f5e65c1 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r810-dirty" +#define PACKAGE_VERSION "0.7.10-r815-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);