r314: cleanup bwamem API
Don't modify input sequences; more documentations
This commit is contained in:
parent
c5434ac865
commit
3e4a178e08
18
bwamem.c
18
bwamem.c
|
|
@ -715,20 +715,29 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
|
|||
return regs;
|
||||
}
|
||||
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
||||
{ // the difference from mem_align1_core() lies in that this routine calls mem_mark_primary_se()
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
|
||||
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
|
||||
mem_alnreg_v ar;
|
||||
char *seq;
|
||||
seq = malloc(l_seq);
|
||||
memcpy(seq, seq_, l_seq); // makes a copy of seq_
|
||||
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
|
||||
mem_mark_primary_se(opt, ar.n, ar.a);
|
||||
free(seq);
|
||||
return ar;
|
||||
}
|
||||
|
||||
// This routine is only used for the API purpose
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar)
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
||||
{
|
||||
mem_aln_t a;
|
||||
int w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
||||
int i, w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
||||
int64_t pos, rb = ar->rb, re = ar->re;
|
||||
uint8_t *query;
|
||||
|
||||
query = malloc(l_query);
|
||||
for (i = 0; i < l_query; ++i) // convert to the nt4 encoding
|
||||
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
|
||||
memset(&a, 0, sizeof(mem_aln_t));
|
||||
a.mapq = mem_approx_mapq_se(opt, ar);
|
||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||
|
|
@ -752,6 +761,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
|||
}
|
||||
a.rid = bns_pos2rid(bns, pos);
|
||||
a.pos = pos - bns->anns[a.rid].offset;
|
||||
free(query);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
|
|
|||
29
bwamem.h
29
bwamem.h
|
|
@ -64,11 +64,11 @@ typedef struct { // TODO: This is an intermediate struct only. Better get rid of
|
|||
} bwahit_t;
|
||||
|
||||
typedef struct { // This struct is only used for the convenience of API.
|
||||
int rid;
|
||||
int pos;
|
||||
uint32_t is_rev:1, mapq:8, NM:23;
|
||||
int n_cigar;
|
||||
uint32_t *cigar;
|
||||
int rid; // reference sequence index in bntseq_t
|
||||
int pos; // forward strand 5'-end mapping position
|
||||
uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
|
||||
int n_cigar; // number of CIGAR operations
|
||||
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
||||
} mem_aln_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -116,13 +116,26 @@ extern "C" {
|
|||
* @param bns Information of the reference
|
||||
* @param pac 2-bit encoded reference
|
||||
* @param l_seq length of query sequence
|
||||
* @param seq query sequence; conversion ACGTN/acgtn=>01234 to be applied
|
||||
* @param seq query sequence
|
||||
*
|
||||
* @return list of aligned regions.
|
||||
*/
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq);
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq);
|
||||
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar);
|
||||
/**
|
||||
* Generate CIGAR and forward-strand position from alignment region
|
||||
*
|
||||
* @param opt alignment parameters
|
||||
* @param bwt FM-index of the reference sequence
|
||||
* @param bns Information of the reference
|
||||
* @param pac 2-bit encoded reference
|
||||
* @param l_seq length of query sequence
|
||||
* @param seq query sequence
|
||||
* @param ar one alignment region
|
||||
*
|
||||
* @return CIGAR, strand, mapping quality and forward-strand position
|
||||
*/
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
|
||||
|
||||
/**
|
||||
* Infer the insert size distribution from interleaved alignment regions
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ int main(int argc, char *argv[])
|
|||
for (i = 0; i < ar.n; ++i) { // traverse each hit
|
||||
mem_aln_t a;
|
||||
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
||||
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, (uint8_t*)ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||
// print alignment
|
||||
printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
|
||||
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
||||
|
|
|
|||
Loading…
Reference in New Issue