added a bit documentation
This commit is contained in:
parent
ee59a13109
commit
6bdccf2a8a
1
bwa.h
1
bwa.h
|
|
@ -34,6 +34,7 @@ extern "C" {
|
||||||
|
|
||||||
char *bwa_idx_infer_prefix(const char *hint);
|
char *bwa_idx_infer_prefix(const char *hint);
|
||||||
bwt_t *bwa_idx_load_bwt(const char *hint);
|
bwt_t *bwa_idx_load_bwt(const char *hint);
|
||||||
|
|
||||||
bwaidx_t *bwa_idx_load(const char *hint, int which);
|
bwaidx_t *bwa_idx_load(const char *hint, int which);
|
||||||
void bwa_idx_destroy(bwaidx_t *idx);
|
void bwa_idx_destroy(bwaidx_t *idx);
|
||||||
|
|
||||||
|
|
|
||||||
16
bwamem.c
16
bwamem.c
|
|
@ -6,6 +6,7 @@
|
||||||
#ifdef HAVE_PTHREAD
|
#ifdef HAVE_PTHREAD
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
|
|
@ -632,19 +633,19 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static mem_alnreg_v find_alnreg(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s)
|
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
mem_chain_v chn;
|
mem_chain_v chn;
|
||||||
mem_alnreg_v regs, tmp;
|
mem_alnreg_v regs, tmp;
|
||||||
for (i = 0; i < s->l_seq; ++i)
|
for (i = 0; i < l_seq; ++i)
|
||||||
s->seq[i] = nst_nt4_table[(int)s->seq[i]];
|
seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]];
|
||||||
chn = mem_chain(opt, bwt, s->l_seq, (uint8_t*)s->seq);
|
chn = mem_chain(opt, bwt, l_seq, (uint8_t*)seq);
|
||||||
chn.n = mem_chain_flt(opt, chn.n, chn.a);
|
chn.n = mem_chain_flt(opt, chn.n, chn.a);
|
||||||
if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
|
if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
|
||||||
kv_init(regs); kv_init(tmp);
|
kv_init(regs); kv_init(tmp);
|
||||||
for (i = 0; i < chn.n; ++i) {
|
for (i = 0; i < chn.n; ++i) {
|
||||||
mem_chain2aln(opt, bns->l_pac, pac, s->l_seq, (uint8_t*)s->seq, &chn.a[i], &tmp);
|
mem_chain2aln(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, &chn.a[i], &tmp);
|
||||||
for (j = 0; j < tmp.n; ++j)
|
for (j = 0; j < tmp.n; ++j)
|
||||||
kv_push(mem_alnreg_t, regs, tmp.a[j]);
|
kv_push(mem_alnreg_t, regs, tmp.a[j]);
|
||||||
free(chn.a[i].seeds);
|
free(chn.a[i].seeds);
|
||||||
|
|
@ -670,7 +671,7 @@ static void *worker1(void *data)
|
||||||
worker_t *w = (worker_t*)data;
|
worker_t *w = (worker_t*)data;
|
||||||
int i;
|
int i;
|
||||||
for (i = w->start; i < w->n; i += w->step)
|
for (i = w->start; i < w->n; i += w->step)
|
||||||
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
w->regs[i] = mem_align1(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -696,7 +697,7 @@ static void *worker2(void *data)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
worker_t *w;
|
worker_t *w;
|
||||||
|
|
@ -737,5 +738,4 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns
|
||||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||||
}
|
}
|
||||||
free(regs); free(w);
|
free(regs); free(w);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
61
bwamem.h
61
bwamem.h
|
|
@ -31,10 +31,14 @@ typedef struct {
|
||||||
} mem_opt_t;
|
} mem_opt_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int64_t rb, re;
|
int64_t rb, re; // [rb,re): reference sequence in the alignment
|
||||||
int score, qb, qe, seedcov, sub, csub; // sub: suboptimal score; csub: suboptimal inside the chain
|
int qb, qe; // [qb,qe): query sequence in the alignment
|
||||||
|
int score; // best SW score
|
||||||
|
int sub; // 2nd best SW score
|
||||||
|
int csub; // SW score of a tandem hit
|
||||||
int sub_n; // approximate number of suboptimal hits
|
int sub_n; // approximate number of suboptimal hits
|
||||||
int secondary; // non-negative if the hit is secondary
|
int seedcov; // length of regions coverged by seeds
|
||||||
|
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
||||||
} mem_alnreg_t;
|
} mem_alnreg_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -63,8 +67,57 @@ extern "C" {
|
||||||
mem_opt_t *mem_opt_init(void);
|
mem_opt_t *mem_opt_init(void);
|
||||||
void mem_fill_scmat(int a, int b, int8_t mat[25]);
|
void mem_fill_scmat(int a, int b, int8_t mat[25]);
|
||||||
|
|
||||||
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
|
/**
|
||||||
|
* Align a batch of sequences and generate the alignments in the SAM format
|
||||||
|
*
|
||||||
|
* This routine requires $seqs[i].{l_seq,seq,name} and write $seqs[i].sam.
|
||||||
|
* Note that $seqs[i].sam may consist of several SAM lines if the
|
||||||
|
* corresponding sequence has multiple primary hits.
|
||||||
|
*
|
||||||
|
* In the paired-end mode (i.e. MEM_F_PE is set in $opt->flag), query
|
||||||
|
* sequences must be interleaved: $n must be an even number and the 2i-th
|
||||||
|
* sequence and the (2i+1)-th sequence constitute a read pair. In this
|
||||||
|
* mode, there should be enough (typically >50) unique pairs for the
|
||||||
|
* routine to infer the orientation and insert size.
|
||||||
|
*
|
||||||
|
* @param opt alignment parameters
|
||||||
|
* @param bwt FM-index of the reference sequence
|
||||||
|
* @param bns Information of the reference
|
||||||
|
* @param pac 2-bit encoded reference
|
||||||
|
* @param n number of query sequences
|
||||||
|
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
||||||
|
*/
|
||||||
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the aligned regions for one query sequence
|
||||||
|
*
|
||||||
|
* Note that this routine does not generate CIGAR. CIGAR should be
|
||||||
|
* generated later by bwa_gen_cigar() defined in bwa.c.
|
||||||
|
*
|
||||||
|
* @param opt alignment parameters
|
||||||
|
* @param bwt FM-index of the reference sequence
|
||||||
|
* @param bns Information of the reference
|
||||||
|
* @param pac 2-bit encoded reference
|
||||||
|
* @param l_seq length of query sequence
|
||||||
|
* @param seq query sequence; conversion ACGTN/acgtn=>01234 to be applied
|
||||||
|
*
|
||||||
|
* @return list of aligned regions.
|
||||||
|
*/
|
||||||
|
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Infer the insert size distribution from interleaved alignment regions
|
||||||
|
*
|
||||||
|
* This function can be called after mem_align1(), as long as paired-end
|
||||||
|
* reads are properly interleaved.
|
||||||
|
*
|
||||||
|
* @param opt alignment parameters
|
||||||
|
* @param l_pac length of concatenated reference sequence
|
||||||
|
* @param n number of query sequences; must be an even number
|
||||||
|
* @param regs region array of size $n; 2i-th and (2i+1)-th elements constitute a pair
|
||||||
|
* @param pes inferred insert size distribution (output)
|
||||||
|
*/
|
||||||
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]);
|
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ int main_mem(int argc, char *argv[])
|
||||||
ks2 = kseq_init(fp2);
|
ks2 = kseq_init(fp2);
|
||||||
opt->flag |= MEM_F_PE;
|
opt->flag |= MEM_F_PE;
|
||||||
}
|
}
|
||||||
while ((seqs = bseq_read(opt->chunk_size, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->chunk_size * (ko2? 2 : 1), &n, ks, ks2)) != 0) {
|
||||||
int64_t size = 0;
|
int64_t size = 0;
|
||||||
if (!copy_comment)
|
if (!copy_comment)
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue