diff --git a/bwamem.c b/bwamem.c index 52dc7fb..7efbb8d 100644 --- a/bwamem.c +++ b/bwamem.c @@ -715,20 +715,29 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse return regs; } -mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq) -{ // the difference from mem_align1_core() lies in that this routine calls mem_mark_primary_se() +mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_) +{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence mem_alnreg_v ar; + char *seq; + seq = malloc(l_seq); + memcpy(seq, seq_, l_seq); // makes a copy of seq_ ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq); mem_mark_primary_se(opt, ar.n, ar.a); + free(seq); return ar; } // This routine is only used for the API purpose -mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar) +mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) { mem_aln_t a; - int w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev; + int i, w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev; int64_t pos, rb = ar->rb, re = ar->re; + uint8_t *query; + + query = malloc(l_query); + for (i = 0; i < l_query; ++i) // convert to the nt4 encoding + query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; memset(&a, 0, sizeof(mem_aln_t)); a.mapq = mem_approx_mapq_se(opt, ar); bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re); @@ -752,6 +761,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t * } a.rid = bns_pos2rid(bns, pos); a.pos = pos - bns->anns[a.rid].offset; + free(query); return a; } diff --git a/bwamem.h b/bwamem.h index c2f124c..a856fa5 100644 --- a/bwamem.h +++ b/bwamem.h @@ -64,11 +64,11 @@ typedef struct { // TODO: This is an intermediate struct only. Better get rid of } bwahit_t; typedef struct { // This struct is only used for the convenience of API. - int rid; - int pos; - uint32_t is_rev:1, mapq:8, NM:23; - int n_cigar; - uint32_t *cigar; + int rid; // reference sequence index in bntseq_t + int pos; // forward strand 5'-end mapping position + uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance + int n_cigar; // number of CIGAR operations + uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234 } mem_aln_t; #ifdef __cplusplus @@ -116,13 +116,26 @@ extern "C" { * @param bns Information of the reference * @param pac 2-bit encoded reference * @param l_seq length of query sequence - * @param seq query sequence; conversion ACGTN/acgtn=>01234 to be applied + * @param seq query sequence * * @return list of aligned regions. */ - mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq); + mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq); - mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar); + /** + * Generate CIGAR and forward-strand position from alignment region + * + * @param opt alignment parameters + * @param bwt FM-index of the reference sequence + * @param bns Information of the reference + * @param pac 2-bit encoded reference + * @param l_seq length of query sequence + * @param seq query sequence + * @param ar one alignment region + * + * @return CIGAR, strand, mapping quality and forward-strand position + */ + mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar); /** * Infer the insert size distribution from interleaved alignment regions diff --git a/example.c b/example.c index 6564cbd..b59eec2 100644 --- a/example.c +++ b/example.c @@ -32,7 +32,7 @@ int main(int argc, char *argv[]) for (i = 0; i < ar.n; ++i) { // traverse each hit mem_aln_t a; if (ar.a[i].secondary >= 0) continue; // skip secondary alignments - a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, (uint8_t*)ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR + a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR // print alignment printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq); for (k = 0; k < a.n_cigar; ++k) // print CIGAR diff --git a/main.c b/main.c index ba60cf7..fee5bd6 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.0-r313" +#define PACKAGE_VERSION "0.7.0-r314" #endif static int usage()