r308: added a new API to convert region to CIGAR
and an example program demonstrating how to do single-end alignment in <50 lines of C code.
This commit is contained in:
parent
64d92d26df
commit
df7c3f0000
5
Makefile
5
Makefile
|
|
@ -8,7 +8,7 @@ AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamli
|
|||
is.o bwtindex.o bwape.o \
|
||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||
PROG= bwa
|
||||
PROG= bwa bwamem-lite
|
||||
INCLUDES=
|
||||
LIBS= -lm -lz -lpthread
|
||||
SUBDIRS= .
|
||||
|
|
@ -25,6 +25,9 @@ all:$(PROG)
|
|||
bwa:libbwa.a $(AOBJS) main.o
|
||||
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ $(LIBS) -L. -lbwa
|
||||
|
||||
bwamem-lite:libbwa.a example.o
|
||||
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ $(LIBS) -L. -lbwa
|
||||
|
||||
libbwa.a:$(LOBJS)
|
||||
$(AR) -csru $@ $(LOBJS)
|
||||
|
||||
|
|
|
|||
34
bwamem.c
34
bwamem.c
|
|
@ -679,7 +679,7 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
|||
s->sam = str.s;
|
||||
}
|
||||
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
||||
mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
||||
{
|
||||
int i;
|
||||
mem_chain_v chn;
|
||||
|
|
@ -703,6 +703,32 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *
|
|||
return regs;
|
||||
}
|
||||
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
||||
{ // the difference from mem_align1_core() lies in that this routine calls mem_mark_primary_se()
|
||||
mem_alnreg_v ar;
|
||||
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
|
||||
mem_mark_primary_se(opt, ar.n, ar.a);
|
||||
return ar;
|
||||
}
|
||||
|
||||
// This routine is only used for the API purpose
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, const mem_alnreg_t *ar)
|
||||
{
|
||||
mem_aln_t a;
|
||||
int qb = ar->qb, qe = ar->qe, NM, score, is_rev;
|
||||
int64_t pos, rb = ar->rb, re = ar->re;
|
||||
memset(&a, 0, sizeof(mem_aln_t));
|
||||
a.mapq = mem_approx_mapq_se(opt, ar);
|
||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||
a.cigar = bwa_gen_cigar(opt->mat, opt->q, opt->r, opt->w, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
|
||||
a.NM = NM;
|
||||
pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
|
||||
a.is_rev = is_rev;
|
||||
a.rid = bns_pos2rid(bns, pos);
|
||||
a.pos = pos - bns->anns[a.rid].offset;
|
||||
return a;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int start, step, n;
|
||||
const mem_opt_t *opt;
|
||||
|
|
@ -720,11 +746,11 @@ static void *worker1(void *data)
|
|||
int i;
|
||||
if (!(w->opt->flag&MEM_F_PE)) {
|
||||
for (i = w->start; i < w->n; i += w->step)
|
||||
w->regs[i] = mem_align1(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq);
|
||||
w->regs[i] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq);
|
||||
} else { // for PE we align the two ends in the same thread in case the 2nd read is of worse quality, in which case some threads may be faster/slower
|
||||
for (i = w->start; i < w->n>>1; i += w->step) {
|
||||
w->regs[i<<1|0] = mem_align1(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq);
|
||||
w->regs[i<<1|1] = mem_align1(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq);
|
||||
w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq);
|
||||
w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
|
|
|||
14
bwamem.h
14
bwamem.h
|
|
@ -49,19 +49,27 @@ typedef struct {
|
|||
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
||||
} mem_alnreg_t;
|
||||
|
||||
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
||||
|
||||
typedef struct {
|
||||
int low, high, failed;
|
||||
double avg, std;
|
||||
} mem_pestat_t;
|
||||
|
||||
typedef struct {
|
||||
typedef struct { // TODO: This is an intermediate struct only. Better get rid of it.
|
||||
int64_t rb, re;
|
||||
int qb, qe, flag, qual;
|
||||
// optional info
|
||||
int score, sub;
|
||||
} bwahit_t;
|
||||
|
||||
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
||||
typedef struct { // This struct is only used for the convenience of API.
|
||||
int rid;
|
||||
int pos;
|
||||
uint32_t is_rev:1, mapq:8, NM:23;
|
||||
int n_cigar;
|
||||
uint32_t *cigar;
|
||||
} mem_aln_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -114,6 +122,8 @@ extern "C" {
|
|||
*/
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq);
|
||||
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, const mem_alnreg_t *ar);
|
||||
|
||||
/**
|
||||
* Infer the insert size distribution from interleaved alignment regions
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "bwamem.h"
|
||||
#include "kseq.h" // for the FASTA/Q parser
|
||||
KSEQ_DECLARE(gzFile)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
bwaidx_t *idx;
|
||||
gzFile fp;
|
||||
kseq_t *ks;
|
||||
mem_opt_t *opt;
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "Usage: bwamem-lite <idx.base> <reads.fq>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index
|
||||
assert(idx);
|
||||
fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
|
||||
assert(fp);
|
||||
ks = kseq_init(fp); // initialize the FASTA/Q parser
|
||||
opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values
|
||||
|
||||
while (kseq_read(ks) >= 0) { // read one sequence
|
||||
mem_alnreg_v ar;
|
||||
int i, k;
|
||||
ar = mem_align1(opt, idx->bwt, idx->bns, idx->pac, ks->seq.l, ks->seq.s); // get all the hits
|
||||
for (i = 0; i < ar.n; ++i) { // traverse each hit
|
||||
mem_aln_t a;
|
||||
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
||||
a = mem_reg2aln(opt, idx->bns, idx->pac, (uint8_t*)ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||
printf("%s\t%c\t%s\t%d\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, a.pos, a.mapq);
|
||||
for (k = 0; k < a.n_cigar; ++k)
|
||||
printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
|
||||
printf("\t%d\n", a.NM);
|
||||
free(a.cigar); // don't forget to deallocate CIGAR
|
||||
}
|
||||
free(ar.a); // and deallocate the hit list
|
||||
}
|
||||
|
||||
free(opt);
|
||||
kseq_destroy(ks);
|
||||
gzclose(fp);
|
||||
bwa_idx_destroy(idx);
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue