r314: cleanup bwamem API

Don't modify input sequences; more documentations
This commit is contained in:
Heng Li 2013-03-01 11:14:51 -05:00
parent c5434ac865
commit 3e4a178e08
4 changed files with 37 additions and 14 deletions

View File

@ -715,20 +715,29 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
return regs;
}
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
{ // the difference from mem_align1_core() lies in that this routine calls mem_mark_primary_se()
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
mem_alnreg_v ar;
char *seq;
seq = malloc(l_seq);
memcpy(seq, seq_, l_seq); // makes a copy of seq_
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
mem_mark_primary_se(opt, ar.n, ar.a);
free(seq);
return ar;
}
// This routine is only used for the API purpose
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar)
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
{
mem_aln_t a;
int w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
int i, w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
int64_t pos, rb = ar->rb, re = ar->re;
uint8_t *query;
query = malloc(l_query);
for (i = 0; i < l_query; ++i) // convert to the nt4 encoding
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
memset(&a, 0, sizeof(mem_aln_t));
a.mapq = mem_approx_mapq_se(opt, ar);
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
@ -752,6 +761,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
}
a.rid = bns_pos2rid(bns, pos);
a.pos = pos - bns->anns[a.rid].offset;
free(query);
return a;
}

View File

@ -64,11 +64,11 @@ typedef struct { // TODO: This is an intermediate struct only. Better get rid of
} bwahit_t;
typedef struct { // This struct is only used for the convenience of API.
int rid;
int pos;
uint32_t is_rev:1, mapq:8, NM:23;
int n_cigar;
uint32_t *cigar;
int rid; // reference sequence index in bntseq_t
int pos; // forward strand 5'-end mapping position
uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
int n_cigar; // number of CIGAR operations
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
} mem_aln_t;
#ifdef __cplusplus
@ -116,13 +116,26 @@ extern "C" {
* @param bns Information of the reference
* @param pac 2-bit encoded reference
* @param l_seq length of query sequence
* @param seq query sequence; conversion ACGTN/acgtn=>01234 to be applied
* @param seq query sequence
*
* @return list of aligned regions.
*/
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq);
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq);
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar);
/**
* Generate CIGAR and forward-strand position from alignment region
*
* @param opt alignment parameters
* @param bwt FM-index of the reference sequence
* @param bns Information of the reference
* @param pac 2-bit encoded reference
* @param l_seq length of query sequence
* @param seq query sequence
* @param ar one alignment region
*
* @return CIGAR, strand, mapping quality and forward-strand position
*/
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
/**
* Infer the insert size distribution from interleaved alignment regions

View File

@ -32,7 +32,7 @@ int main(int argc, char *argv[])
for (i = 0; i < ar.n; ++i) { // traverse each hit
mem_aln_t a;
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, (uint8_t*)ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
// print alignment
printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
for (k = 0; k < a.n_cigar; ++k) // print CIGAR

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.0-r313"
#define PACKAGE_VERSION "0.7.0-r314"
#endif
static int usage()