r314: cleanup bwamem API
Don't modify input sequences; more documentations
This commit is contained in:
parent
c5434ac865
commit
3e4a178e08
18
bwamem.c
18
bwamem.c
|
|
@ -715,20 +715,29 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
|
||||||
return regs;
|
return regs;
|
||||||
}
|
}
|
||||||
|
|
||||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
|
||||||
{ // the difference from mem_align1_core() lies in that this routine calls mem_mark_primary_se()
|
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
|
||||||
mem_alnreg_v ar;
|
mem_alnreg_v ar;
|
||||||
|
char *seq;
|
||||||
|
seq = malloc(l_seq);
|
||||||
|
memcpy(seq, seq_, l_seq); // makes a copy of seq_
|
||||||
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
|
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
|
||||||
mem_mark_primary_se(opt, ar.n, ar.a);
|
mem_mark_primary_se(opt, ar.n, ar.a);
|
||||||
|
free(seq);
|
||||||
return ar;
|
return ar;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This routine is only used for the API purpose
|
// This routine is only used for the API purpose
|
||||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar)
|
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
||||||
{
|
{
|
||||||
mem_aln_t a;
|
mem_aln_t a;
|
||||||
int w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
int i, w2, qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
||||||
int64_t pos, rb = ar->rb, re = ar->re;
|
int64_t pos, rb = ar->rb, re = ar->re;
|
||||||
|
uint8_t *query;
|
||||||
|
|
||||||
|
query = malloc(l_query);
|
||||||
|
for (i = 0; i < l_query; ++i) // convert to the nt4 encoding
|
||||||
|
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
|
||||||
memset(&a, 0, sizeof(mem_aln_t));
|
memset(&a, 0, sizeof(mem_aln_t));
|
||||||
a.mapq = mem_approx_mapq_se(opt, ar);
|
a.mapq = mem_approx_mapq_se(opt, ar);
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||||
|
|
@ -752,6 +761,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
||||||
}
|
}
|
||||||
a.rid = bns_pos2rid(bns, pos);
|
a.rid = bns_pos2rid(bns, pos);
|
||||||
a.pos = pos - bns->anns[a.rid].offset;
|
a.pos = pos - bns->anns[a.rid].offset;
|
||||||
|
free(query);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
29
bwamem.h
29
bwamem.h
|
|
@ -64,11 +64,11 @@ typedef struct { // TODO: This is an intermediate struct only. Better get rid of
|
||||||
} bwahit_t;
|
} bwahit_t;
|
||||||
|
|
||||||
typedef struct { // This struct is only used for the convenience of API.
|
typedef struct { // This struct is only used for the convenience of API.
|
||||||
int rid;
|
int rid; // reference sequence index in bntseq_t
|
||||||
int pos;
|
int pos; // forward strand 5'-end mapping position
|
||||||
uint32_t is_rev:1, mapq:8, NM:23;
|
uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
|
||||||
int n_cigar;
|
int n_cigar; // number of CIGAR operations
|
||||||
uint32_t *cigar;
|
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
||||||
} mem_aln_t;
|
} mem_aln_t;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
@ -116,13 +116,26 @@ extern "C" {
|
||||||
* @param bns Information of the reference
|
* @param bns Information of the reference
|
||||||
* @param pac 2-bit encoded reference
|
* @param pac 2-bit encoded reference
|
||||||
* @param l_seq length of query sequence
|
* @param l_seq length of query sequence
|
||||||
* @param seq query sequence; conversion ACGTN/acgtn=>01234 to be applied
|
* @param seq query sequence
|
||||||
*
|
*
|
||||||
* @return list of aligned regions.
|
* @return list of aligned regions.
|
||||||
*/
|
*/
|
||||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq);
|
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq);
|
||||||
|
|
||||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, uint8_t *query, const mem_alnreg_t *ar);
|
/**
|
||||||
|
* Generate CIGAR and forward-strand position from alignment region
|
||||||
|
*
|
||||||
|
* @param opt alignment parameters
|
||||||
|
* @param bwt FM-index of the reference sequence
|
||||||
|
* @param bns Information of the reference
|
||||||
|
* @param pac 2-bit encoded reference
|
||||||
|
* @param l_seq length of query sequence
|
||||||
|
* @param seq query sequence
|
||||||
|
* @param ar one alignment region
|
||||||
|
*
|
||||||
|
* @return CIGAR, strand, mapping quality and forward-strand position
|
||||||
|
*/
|
||||||
|
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Infer the insert size distribution from interleaved alignment regions
|
* Infer the insert size distribution from interleaved alignment regions
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ int main(int argc, char *argv[])
|
||||||
for (i = 0; i < ar.n; ++i) { // traverse each hit
|
for (i = 0; i < ar.n; ++i) { // traverse each hit
|
||||||
mem_aln_t a;
|
mem_aln_t a;
|
||||||
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
||||||
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, (uint8_t*)ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||||
// print alignment
|
// print alignment
|
||||||
printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
|
printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
|
||||||
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue