r815: optionally output ref fasta header
Also fixed a bug in reading .ann files
This commit is contained in:
parent
1e611b235c
commit
35ac99b4f7
6
bntseq.c
6
bntseq.c
|
|
@ -94,7 +94,7 @@ void bns_dump(const bntseq_t *bns, const char *prefix)
|
|||
|
||||
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename)
|
||||
{
|
||||
char str[1024];
|
||||
char str[8192];
|
||||
FILE *fp;
|
||||
const char *fname;
|
||||
bntseq_t *bns;
|
||||
|
|
@ -117,14 +117,14 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
|
|||
if (scanres != 2) goto badread;
|
||||
p->name = strdup(str);
|
||||
// read fasta comments
|
||||
while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
|
||||
while (q - str < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
|
||||
while (c != '\n' && c != EOF) c = fgetc(fp);
|
||||
if (c == EOF) {
|
||||
scanres = EOF;
|
||||
goto badread;
|
||||
}
|
||||
*q = 0;
|
||||
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
|
||||
if (q - str > 1 && strcmp(str, " (null)") != 0) p->anno = strdup(str + 1); // skip leading space
|
||||
else p->anno = strdup("");
|
||||
// read the rest
|
||||
scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
|
||||
|
|
|
|||
20
bwamem.c
20
bwamem.c
|
|
@ -759,7 +759,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
|
|||
return l;
|
||||
}
|
||||
|
||||
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all)
|
||||
void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
|
||||
{
|
||||
int i, l_name;
|
||||
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
|
||||
|
|
@ -788,7 +788,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
if (p->n_cigar) { // aligned
|
||||
for (i = 0; i < p->n_cigar; ++i) {
|
||||
int c = p->cigar[i]&0xf;
|
||||
if (!softclip_all && (c == 3 || c == 4))
|
||||
if (!(opt->flag&MEM_F_SOFTCLIP) && (c == 3 || c == 4))
|
||||
c = which? 4 : 3; // use hard clipping for supplementary alignments
|
||||
kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str);
|
||||
}
|
||||
|
|
@ -816,7 +816,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
kputsn("*\t*", 3, str);
|
||||
} else if (!p->is_rev) { // the forward strand
|
||||
int i, qb = 0, qe = s->l_seq;
|
||||
if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all
|
||||
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) { // have cigar && not the primary alignment && not softclip all
|
||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
|
||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
|
||||
}
|
||||
|
|
@ -830,7 +830,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
} else kputc('*', str);
|
||||
} else { // the reverse strand
|
||||
int i, qb = 0, qe = s->l_seq;
|
||||
if (p->n_cigar && which && !softclip_all) {
|
||||
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) {
|
||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
|
||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
|
||||
}
|
||||
|
|
@ -875,6 +875,14 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
}
|
||||
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
||||
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
||||
if ((opt->flag&MEM_F_REF_HDR) && p->rid >= 0 && bns->anns[p->rid].anno != 0 && bns->anns[p->rid].anno[0] != 0) {
|
||||
int tmp;
|
||||
kputsn("\tXR:Z:", 6, str);
|
||||
tmp = str->l;
|
||||
kputs(bns->anns[p->rid].anno, str);
|
||||
for (i = tmp; i < str->l; ++i) // replace TAB in the comment to SPACE
|
||||
if (str->s[i] == '\t') str->s[i] = ' ';
|
||||
}
|
||||
kputc('\n', str);
|
||||
}
|
||||
|
||||
|
|
@ -941,10 +949,10 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
|
|||
mem_aln_t t;
|
||||
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
||||
t.flag |= extra_flag;
|
||||
mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP);
|
||||
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m);
|
||||
} else {
|
||||
for (k = 0; k < aa.n; ++k)
|
||||
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP);
|
||||
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m);
|
||||
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||
free(aa.a);
|
||||
}
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -18,6 +18,7 @@ typedef struct __smem_i smem_i;
|
|||
#define MEM_F_NO_RESCUE 0x20
|
||||
#define MEM_F_SELF_OVLP 0x40
|
||||
#define MEM_F_ALN_REG 0x80
|
||||
#define MEM_F_REF_HDR 0x100
|
||||
#define MEM_F_SOFTCLIP 0x200
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
||||
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all);
|
||||
extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
|
||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
||||
|
||||
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
|
||||
|
|
@ -327,8 +327,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
// write SAM
|
||||
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
|
||||
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
|
||||
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0;
|
||||
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s;
|
||||
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
|
||||
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
|
||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
free(h[0].cigar); // h[0].XA will be freed in the following block
|
||||
free(h[1].cigar);
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ int main_mem(int argc, char *argv[])
|
|||
|
||||
opt = mem_opt_init();
|
||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||
else if (c == 'x') mode = optarg;
|
||||
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
||||
|
|
@ -71,6 +71,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
|
||||
else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
|
||||
else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
|
||||
else if (c == 'V') opt->flag |= MEM_F_REF_HDR;
|
||||
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
|
||||
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
|
||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||
|
|
@ -163,6 +164,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, " -h INT if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
|
||||
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
|
||||
fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n");
|
||||
fprintf(stderr, " -V output the reference FASTA header in the XR tag\n");
|
||||
fprintf(stderr, " -Y use soft clipping for supplementary alignments\n");
|
||||
fprintf(stderr, " -M mark shorter split hits as secondary\n\n");
|
||||
fprintf(stderr, " -I FLOAT[,FLOAT[,INT[,INT]]]\n");
|
||||
|
|
|
|||
Loading…
Reference in New Issue