Merge branch 'nh_bam_support' of git://github.com/nh13/bwa into nh13-nh_bam_support
This commit is contained in:
commit
8a230db596
20
Makefile
20
Makefile
|
|
@ -2,6 +2,8 @@ CC= gcc
|
||||||
#CC= clang --analyze
|
#CC= clang --analyze
|
||||||
CFLAGS= -g -Wall -Wno-unused-function -O2
|
CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||||
|
# set to 1 if you wish to have bam support, type 'make clean; make all'
|
||||||
|
USE_HTSLIB=0
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
|
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
|
||||||
|
|
@ -17,15 +19,27 @@ SUBDIRS= .
|
||||||
.SUFFIXES:.c .o .cc
|
.SUFFIXES:.c .o .cc
|
||||||
|
|
||||||
.c.o:
|
.c.o:
|
||||||
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
ifeq ($(USE_HTSLIB),1)
|
||||||
|
$(CC) -c $(CFLAGS) $(DFLAGS) -DUSE_HTSLIB $(INCLUDES) -I ../htslib $< -o $@
|
||||||
|
else
|
||||||
|
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
all:$(PROG)
|
all:$(PROG)
|
||||||
|
|
||||||
bwa:libbwa.a $(AOBJS) main.o
|
bwa:libbwa.a $(AOBJS) main.o
|
||||||
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
|
ifeq ($(USE_HTSLIB),1)
|
||||||
|
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ ../htslib/libhts.a -L. -L../htslib -lbwa $(LIBS)
|
||||||
|
else
|
||||||
|
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
|
||||||
|
endif
|
||||||
|
|
||||||
bwamem-lite:libbwa.a example.o
|
bwamem-lite:libbwa.a example.o
|
||||||
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
|
ifeq ($(USE_HTSLIB),1)
|
||||||
|
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ ../htslib/libhts.a -L. -L../htslib -lbwa $(LIBS)
|
||||||
|
else
|
||||||
|
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
|
||||||
|
endif
|
||||||
|
|
||||||
libbwa.a:$(LOBJS)
|
libbwa.a:$(LOBJS)
|
||||||
$(AR) -csru $@ $(LOBJS)
|
$(AR) -csru $@ $(LOBJS)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
@ -208,3 +209,4 @@ int bamlite_gzclose(gzFile file) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
|
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#ifndef BAMLITE_H_
|
#ifndef BAMLITE_H_
|
||||||
#define BAMLITE_H_
|
#define BAMLITE_H_
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
@ -112,3 +113,4 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
|
||||||
39
bwa.c
39
bwa.c
|
|
@ -7,6 +7,9 @@
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
#include <htslib/sam.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef USE_MALLOC_WRAPPERS
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
# include "malloc_wrap.h"
|
# include "malloc_wrap.h"
|
||||||
|
|
@ -274,6 +277,19 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line)
|
||||||
err_printf("%s\n", bwa_pg);
|
err_printf("%s\n", bwa_pg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
extern char *bwa_pg;
|
||||||
|
str->l = 0; str->s = 0;
|
||||||
|
for (i = 0; i < bns->n_seqs; ++i)
|
||||||
|
ksprintf(str, "@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len);
|
||||||
|
if (rg_line) ksprintf(str, "%s\n", rg_line);
|
||||||
|
ksprintf(str, "%s\n", bwa_pg);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static char *bwa_escape(char *s)
|
static char *bwa_escape(char *s)
|
||||||
{
|
{
|
||||||
char *p, *q;
|
char *p, *q;
|
||||||
|
|
@ -319,3 +335,26 @@ err_set_rg:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bams_t *bams_init() {
|
||||||
|
return calloc(1, sizeof(bams_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
void bams_add(bams_t *bams, bam1_t *b) {
|
||||||
|
if (bams->m == bams->l) {
|
||||||
|
bams->m = bams->m << 2;
|
||||||
|
bams->bams = realloc(bams->bams, sizeof(bam1_t) * bams->m);
|
||||||
|
}
|
||||||
|
bams->bams[bams->l] = b;
|
||||||
|
bams->l++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bams_destroy(bams_t *bams) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < bams->l; i++) {
|
||||||
|
bam_destroy1(bams->bams[i]);
|
||||||
|
}
|
||||||
|
free(bams->bams);
|
||||||
|
free(bams);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
||||||
32
bwa.h
32
bwa.h
|
|
@ -4,6 +4,9 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
#include <htslib/sam.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define BWA_IDX_BWT 0x1
|
#define BWA_IDX_BWT 0x1
|
||||||
#define BWA_IDX_BNS 0x2
|
#define BWA_IDX_BNS 0x2
|
||||||
|
|
@ -16,11 +19,31 @@ typedef struct {
|
||||||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||||
} bwaidx_t;
|
} bwaidx_t;
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
typedef struct {
|
||||||
|
int l, m;
|
||||||
|
bam1_t **bams;
|
||||||
|
} bams_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int l_seq;
|
int l_seq;
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
char *name, *comment, *seq, *qual;
|
||||||
|
bams_t *bams;
|
||||||
|
#else
|
||||||
char *name, *comment, *seq, *qual, *sam;
|
char *name, *comment, *seq, *qual, *sam;
|
||||||
|
#endif
|
||||||
} bseq1_t;
|
} bseq1_t;
|
||||||
|
|
||||||
|
// This is here to faciliate passing around HTSLIB's bam_hdr_t structure when we are not compiling with HTSLIB
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
|
typedef struct {
|
||||||
|
void *ptr;
|
||||||
|
} bam_hdr_t; // DO NOT USE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern int bwa_verbose;
|
extern int bwa_verbose;
|
||||||
extern char bwa_rg_id[256];
|
extern char bwa_rg_id[256];
|
||||||
|
|
||||||
|
|
@ -41,8 +64,17 @@ extern "C" {
|
||||||
void bwa_idx_destroy(bwaidx_t *idx);
|
void bwa_idx_destroy(bwaidx_t *idx);
|
||||||
|
|
||||||
void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line);
|
void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line);
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str);
|
||||||
|
#endif
|
||||||
char *bwa_set_rg(const char *s);
|
char *bwa_set_rg(const char *s);
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bams_t *bams_init();
|
||||||
|
void bams_add(bams_t *bams, bam1_t *b);
|
||||||
|
void bams_destroy(bams_t *bams);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
42
bwamem.c
42
bwamem.c
|
|
@ -15,6 +15,10 @@
|
||||||
#include "ksort.h"
|
#include "ksort.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
#include <htslib/sam.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef USE_MALLOC_WRAPPERS
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
# include "malloc_wrap.h"
|
# include "malloc_wrap.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -753,7 +757,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all)
|
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all, bam_hdr_t *h)
|
||||||
{
|
{
|
||||||
int i, l_name;
|
int i, l_name;
|
||||||
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
|
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
|
||||||
|
|
@ -870,6 +874,15 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
||||||
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
||||||
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
||||||
kputc('\n', str);
|
kputc('\n', str);
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bam1_t *b = bam_init1();
|
||||||
|
if (sam_parse1(str, h, b) < 0) {
|
||||||
|
// TODO: error!
|
||||||
|
}
|
||||||
|
bams_add(s->bams, b);
|
||||||
|
str->l = 0; str->s = 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************
|
/************************
|
||||||
|
|
@ -903,7 +916,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
|
// TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
|
||||||
void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
|
void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, bam_hdr_t *h)
|
||||||
{
|
{
|
||||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
|
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
|
||||||
kstring_t str;
|
kstring_t str;
|
||||||
|
|
@ -935,14 +948,16 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
|
||||||
mem_aln_t t;
|
mem_aln_t t;
|
||||||
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
||||||
t.flag |= extra_flag;
|
t.flag |= extra_flag;
|
||||||
mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP);
|
mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP, h);
|
||||||
} else {
|
} else {
|
||||||
for (k = 0; k < aa.n; ++k)
|
for (k = 0; k < aa.n; ++k)
|
||||||
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP);
|
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP, h);
|
||||||
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||||
free(aa.a);
|
free(aa.a);
|
||||||
}
|
}
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
|
#endif
|
||||||
if (XA) {
|
if (XA) {
|
||||||
for (k = 0; k < a->n; ++k) free(XA[k]);
|
for (k = 0; k < a->n; ++k) free(XA[k]);
|
||||||
free(XA);
|
free(XA);
|
||||||
|
|
@ -1065,6 +1080,7 @@ typedef struct {
|
||||||
bseq1_t *seqs;
|
bseq1_t *seqs;
|
||||||
mem_alnreg_v *regs;
|
mem_alnreg_v *regs;
|
||||||
int64_t n_processed;
|
int64_t n_processed;
|
||||||
|
bam_hdr_t *h;
|
||||||
} worker_t;
|
} worker_t;
|
||||||
|
|
||||||
static void worker1(void *data, int i, int tid)
|
static void worker1(void *data, int i, int tid)
|
||||||
|
|
@ -1083,26 +1099,33 @@ static void worker1(void *data, int i, int tid)
|
||||||
|
|
||||||
static void worker2(void *data, int i, int tid)
|
static void worker2(void *data, int i, int tid)
|
||||||
{
|
{
|
||||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
|
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], bam_hdr_t *h);
|
||||||
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
|
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
|
||||||
worker_t *w = (worker_t*)data;
|
worker_t *w = (worker_t*)data;
|
||||||
if (!(w->opt->flag&MEM_F_PE)) {
|
if (!(w->opt->flag&MEM_F_PE)) {
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
w->seqs[i].bams = bams_init();
|
||||||
|
#endif
|
||||||
if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
|
if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
|
||||||
if (w->opt->flag & MEM_F_ALN_REG) {
|
if (w->opt->flag & MEM_F_ALN_REG) {
|
||||||
mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
|
mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
|
||||||
} else {
|
} else {
|
||||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
|
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
|
||||||
mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
|
mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0, w->h);
|
||||||
}
|
}
|
||||||
free(w->regs[i].a);
|
free(w->regs[i].a);
|
||||||
} else {
|
} else {
|
||||||
if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
|
if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
|
||||||
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1]);
|
#ifdef USE_HTSLIB
|
||||||
|
w->seqs[i<<1].bams = bams_init();
|
||||||
|
w->seqs[1+(i<<1)].bams = bams_init();
|
||||||
|
#endif
|
||||||
|
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1], w->h);
|
||||||
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h)
|
||||||
{
|
{
|
||||||
extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n);
|
extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n);
|
||||||
worker_t w;
|
worker_t w;
|
||||||
|
|
@ -1114,12 +1137,13 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
|
||||||
ctime = cputime(); rtime = realtime();
|
ctime = cputime(); rtime = realtime();
|
||||||
global_bns = bns;
|
global_bns = bns;
|
||||||
regs = malloc(n * sizeof(mem_alnreg_v));
|
regs = malloc(n * sizeof(mem_alnreg_v));
|
||||||
w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
|
w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
|
||||||
w.seqs = seqs; w.regs = regs; w.n_processed = n_processed;
|
w.seqs = seqs; w.regs = regs; w.n_processed = n_processed;
|
||||||
w.pes = &pes[0];
|
w.pes = &pes[0];
|
||||||
w.aux = malloc(opt->n_threads * sizeof(smem_aux_t));
|
w.aux = malloc(opt->n_threads * sizeof(smem_aux_t));
|
||||||
for (i = 0; i < opt->n_threads; ++i)
|
for (i = 0; i < opt->n_threads; ++i)
|
||||||
w.aux[i] = smem_aux_init();
|
w.aux[i] = smem_aux_init();
|
||||||
|
w.h = h;
|
||||||
kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
|
kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
|
||||||
for (i = 0; i < opt->n_threads; ++i)
|
for (i = 0; i < opt->n_threads; ++i)
|
||||||
smem_aux_destroy(w.aux[i]);
|
smem_aux_destroy(w.aux[i]);
|
||||||
|
|
|
||||||
6
bwamem.h
6
bwamem.h
|
|
@ -50,6 +50,9 @@ typedef struct {
|
||||||
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||||
int max_hits; // if there are max_hits or fewer, output them all
|
int max_hits; // if there are max_hits or fewer, output them all
|
||||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
int bam_output;
|
||||||
|
#endif
|
||||||
} mem_opt_t;
|
} mem_opt_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -123,8 +126,9 @@ extern "C" {
|
||||||
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
|
||||||
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
|
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
|
||||||
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
|
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
|
||||||
|
* @param h the BAM header, NULL if not using HTSLIB
|
||||||
*/
|
*/
|
||||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
|
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the aligned regions for one query sequence
|
* Find the aligned regions for one query sequence
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,11 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *
|
||||||
return ar;
|
return ar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, bam_hdr_t *h)
|
||||||
|
#else
|
||||||
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
|
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
kstring_t str = {0,0,0};
|
kstring_t str = {0,0,0};
|
||||||
|
|
@ -102,7 +106,15 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
|
||||||
ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
|
ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
|
||||||
kputc('\n', &str);
|
kputc('\n', &str);
|
||||||
}
|
}
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bam1_t *b = bam_init1();
|
||||||
|
if (sam_parse1(&str, h, b) < 0) {
|
||||||
|
// TODO: error!
|
||||||
|
}
|
||||||
|
bams_add(s->bams, b);
|
||||||
|
#else
|
||||||
s->sam = str.s;
|
s->sam = str.s;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
|
// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
|
||||||
|
|
|
||||||
|
|
@ -242,12 +242,12 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons
|
||||||
|
|
||||||
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
||||||
|
|
||||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], bam_hdr_t *header)
|
||||||
{
|
{
|
||||||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||||
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, bam_hdr_t *h);
|
||||||
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all);
|
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all, bam_hdr_t *h);
|
||||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
||||||
|
|
||||||
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
|
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
|
||||||
|
|
@ -327,8 +327,14 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
// write SAM
|
// write SAM
|
||||||
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
|
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
|
||||||
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
|
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
|
||||||
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0;
|
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP, header);
|
||||||
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s;
|
#ifndef USE_HTSLIB
|
||||||
|
s[0].sam = strdup(str.s); str.l = 0; /// not using HTSLIB
|
||||||
|
#endif
|
||||||
|
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP, header);
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
|
s[1].sam = str.s; // not using HTSLIB
|
||||||
|
#endif
|
||||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||||
free(h[0].cigar); // h[0].XA will be freed in the following block
|
free(h[0].cigar); // h[0].XA will be freed in the following block
|
||||||
free(h[1].cigar);
|
free(h[1].cigar);
|
||||||
|
|
@ -354,8 +360,8 @@ no_pairing:
|
||||||
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
||||||
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
|
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
|
||||||
}
|
}
|
||||||
mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
|
mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1], header);
|
||||||
mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
|
mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0], header);
|
||||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||||
free(h[0].cigar); free(h[1].cigar);
|
free(h[0].cigar); free(h[1].cigar);
|
||||||
return n;
|
return n;
|
||||||
|
|
|
||||||
44
bwaseqio.c
44
bwaseqio.c
|
|
@ -2,7 +2,11 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "bwtaln.h"
|
#include "bwtaln.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
#include <htslib/sam.h>
|
||||||
|
#else
|
||||||
#include "bamlite.h"
|
#include "bamlite.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
@ -17,7 +21,12 @@ static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4
|
||||||
struct __bwa_seqio_t {
|
struct __bwa_seqio_t {
|
||||||
// for BAM input
|
// for BAM input
|
||||||
int is_bam, which; // 1st bit: read1, 2nd bit: read2, 3rd: SE
|
int is_bam, which; // 1st bit: read1, 2nd bit: read2, 3rd: SE
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
samFile *fp;
|
||||||
|
bam_hdr_t *h;
|
||||||
|
#else
|
||||||
bamFile fp;
|
bamFile fp;
|
||||||
|
#endif
|
||||||
// for fastq input
|
// for fastq input
|
||||||
kseq_t *ks;
|
kseq_t *ks;
|
||||||
};
|
};
|
||||||
|
|
@ -25,14 +34,24 @@ struct __bwa_seqio_t {
|
||||||
bwa_seqio_t *bwa_bam_open(const char *fn, int which)
|
bwa_seqio_t *bwa_bam_open(const char *fn, int which)
|
||||||
{
|
{
|
||||||
bwa_seqio_t *bs;
|
bwa_seqio_t *bs;
|
||||||
|
#ifndef USE_HTSLIB
|
||||||
bam_header_t *h;
|
bam_header_t *h;
|
||||||
|
#endif
|
||||||
bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t));
|
bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t));
|
||||||
bs->is_bam = 1;
|
bs->is_bam = 1;
|
||||||
bs->which = which;
|
bs->which = which;
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bs->fp = sam_open(fn, "rb");
|
||||||
|
#else
|
||||||
bs->fp = bam_open(fn, "r");
|
bs->fp = bam_open(fn, "r");
|
||||||
|
#endif
|
||||||
if (0 == bs->fp) err_fatal_simple("Couldn't open bam file");
|
if (0 == bs->fp) err_fatal_simple("Couldn't open bam file");
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
bs->h = sam_hdr_read(bs->fp);
|
||||||
|
#else
|
||||||
h = bam_header_read(bs->fp);
|
h = bam_header_read(bs->fp);
|
||||||
bam_header_destroy(h);
|
bam_header_destroy(h);
|
||||||
|
#endif
|
||||||
return bs;
|
return bs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -50,7 +69,12 @@ void bwa_seq_close(bwa_seqio_t *bs)
|
||||||
{
|
{
|
||||||
if (bs == 0) return;
|
if (bs == 0) return;
|
||||||
if (bs->is_bam) {
|
if (bs->is_bam) {
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
if (0 != sam_close(bs->fp)) err_fatal_simple("Error closing sam/bam file");
|
||||||
|
bam_hdr_destroy(bs->h);
|
||||||
|
#else
|
||||||
if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file");
|
if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file");
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
err_gzclose(bs->ks->f->f);
|
err_gzclose(bs->ks->f->f);
|
||||||
kseq_destroy(bs->ks);
|
kseq_destroy(bs->ks);
|
||||||
|
|
@ -101,7 +125,11 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
||||||
b = bam_init1();
|
b = bam_init1();
|
||||||
n_seqs = 0;
|
n_seqs = 0;
|
||||||
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
while ((res = sam_read1(bs->fp, bs->h, b)) >= 0) {
|
||||||
|
#else
|
||||||
while ((res = bam_read1(bs->fp, b)) >= 0) {
|
while ((res = bam_read1(bs->fp, b)) >= 0) {
|
||||||
|
#endif
|
||||||
uint8_t *s, *q;
|
uint8_t *s, *q;
|
||||||
int go = 0;
|
int go = 0;
|
||||||
if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
|
if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
|
||||||
|
|
@ -114,14 +142,26 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
||||||
p->qual = 0;
|
p->qual = 0;
|
||||||
p->full_len = p->clip_len = p->len = l;
|
p->full_len = p->clip_len = p->len = l;
|
||||||
n_tot += p->full_len;
|
n_tot += p->full_len;
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
s = bam_get_seq(b); q = bam_get_qual(b);
|
||||||
|
#else
|
||||||
s = bam1_seq(b); q = bam1_qual(b);
|
s = bam1_seq(b); q = bam1_qual(b);
|
||||||
|
#endif
|
||||||
p->seq = (ubyte_t*)calloc(p->len + 1, 1);
|
p->seq = (ubyte_t*)calloc(p->len + 1, 1);
|
||||||
p->qual = (ubyte_t*)calloc(p->len + 1, 1);
|
p->qual = (ubyte_t*)calloc(p->len + 1, 1);
|
||||||
for (i = 0; i != p->full_len; ++i) {
|
for (i = 0; i != p->full_len; ++i) {
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
p->seq[i] = bam_nt16_nt4_table[(int)bam_seqi(s, i)];
|
||||||
|
#else
|
||||||
p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
|
p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
|
||||||
|
#endif
|
||||||
p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
|
p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
|
||||||
}
|
}
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
if (bam_is_rev(b)) { // then reverse
|
||||||
|
#else
|
||||||
if (bam1_strand(b)) { // then reverse
|
if (bam1_strand(b)) { // then reverse
|
||||||
|
#endif
|
||||||
seq_reverse(p->len, p->seq, 1);
|
seq_reverse(p->len, p->seq, 1);
|
||||||
seq_reverse(p->len, p->qual, 0);
|
seq_reverse(p->len, p->qual, 0);
|
||||||
}
|
}
|
||||||
|
|
@ -130,7 +170,11 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
||||||
memcpy(p->rseq, p->seq, p->len);
|
memcpy(p->rseq, p->seq, p->len);
|
||||||
seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
|
seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
|
||||||
seq_reverse(p->len, p->rseq, is_comp);
|
seq_reverse(p->len, p->rseq, is_comp);
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
p->name = strdup((const char*)bam_get_qname(b));
|
||||||
|
#else
|
||||||
p->name = strdup((const char*)bam1_qname(b));
|
p->name = strdup((const char*)bam1_qname(b));
|
||||||
|
#endif
|
||||||
if (n_seqs == n_needed) break;
|
if (n_seqs == n_needed) break;
|
||||||
}
|
}
|
||||||
if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
|
if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
|
||||||
|
|
|
||||||
59
fastmap.c
59
fastmap.c
|
|
@ -53,7 +53,11 @@ int main_mem(int argc, char *argv[])
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:o:")) >= 0) {
|
||||||
|
#else
|
||||||
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
|
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
|
||||||
|
#endif
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||||
else if (c == 'x') mode = optarg;
|
else if (c == 'x') mode = optarg;
|
||||||
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
||||||
|
|
@ -120,7 +124,13 @@ int main_mem(int argc, char *argv[])
|
||||||
if (bwa_verbose >= 3)
|
if (bwa_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n",
|
fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n",
|
||||||
__func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low);
|
__func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low);
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
} else if (c == 'o') {
|
||||||
|
opt->bam_output = atoi(optarg);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
}
|
||||||
|
#endif
|
||||||
else return 1;
|
else return 1;
|
||||||
}
|
}
|
||||||
if (opt->n_threads < 1) opt->n_threads = 1;
|
if (opt->n_threads < 1) opt->n_threads = 1;
|
||||||
|
|
@ -165,6 +175,9 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, " specify the mean, standard deviation (10%% of the mean if absent), max\n");
|
fprintf(stderr, " specify the mean, standard deviation (10%% of the mean if absent), max\n");
|
||||||
fprintf(stderr, " (4 sigma from the mean if absent) and min of the insert size distribution.\n");
|
fprintf(stderr, " (4 sigma from the mean if absent) and min of the insert size distribution.\n");
|
||||||
fprintf(stderr, " FR orientation only. [inferred]\n");
|
fprintf(stderr, " FR orientation only. [inferred]\n");
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
fprintf(stderr, " -o INT 0 - BAM (compressed), 1 - BAM (uncompressed), 2 - SAM [%d]\n", opt->bam_output);
|
||||||
|
#endif
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n");
|
fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
|
@ -226,8 +239,33 @@ int main_mem(int argc, char *argv[])
|
||||||
opt->flag |= MEM_F_PE;
|
opt->flag |= MEM_F_PE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!(opt->flag & MEM_F_ALN_REG))
|
bam_hdr_t *h = NULL; // TODO
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
samFile *out = NULL;
|
||||||
|
char *modes[] = {"wb", "wbu", "w"};
|
||||||
|
switch (opt->bam_output) {
|
||||||
|
case 0: // BAM compressed
|
||||||
|
case 1: // BAM uncompressed
|
||||||
|
case 2: // SAM
|
||||||
|
out = sam_open("-", modes[opt->bam_output]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Error: output format was out of range [%d]\n", opt->bam_output);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (!(opt->flag & MEM_F_ALN_REG)) {
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
kstring_t str;
|
||||||
|
str.l = str.m = 0; str.s = 0;
|
||||||
|
bwa_format_sam_hdr(idx->bns, rg_line, &str);
|
||||||
|
h = sam_hdr_parse(str.l, str.s);
|
||||||
|
h->l_text = str.l; h->text = str.s;
|
||||||
|
sam_hdr_write(out, h);
|
||||||
|
#else
|
||||||
bwa_print_sam_hdr(idx->bns, rg_line);
|
bwa_print_sam_hdr(idx->bns, rg_line);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
||||||
int64_t size = 0;
|
int64_t size = 0;
|
||||||
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
|
if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
|
||||||
|
|
@ -242,11 +280,22 @@ int main_mem(int argc, char *argv[])
|
||||||
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
|
for (i = 0; i < n; ++i) size += seqs[i].l_seq;
|
||||||
if (bwa_verbose >= 3)
|
if (bwa_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
||||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0);
|
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0, h);
|
||||||
n_processed += n;
|
n_processed += n;
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
if (seqs[i].bams) {
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < seqs[i].bams->l; j++) {
|
||||||
|
sam_write1(out, h, seqs[i].bams->bams[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bams_destroy(seqs[i].bams); seqs[i].bams = NULL;
|
||||||
|
#else
|
||||||
if (seqs[i].sam) err_fputs(seqs[i].sam, stdout);
|
if (seqs[i].sam) err_fputs(seqs[i].sam, stdout);
|
||||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
free(seqs[i].sam);
|
||||||
|
#endif
|
||||||
|
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual);
|
||||||
}
|
}
|
||||||
free(seqs);
|
free(seqs);
|
||||||
}
|
}
|
||||||
|
|
@ -255,6 +304,10 @@ int main_mem(int argc, char *argv[])
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
err_gzclose(fp); kclose(ko);
|
err_gzclose(fp); kclose(ko);
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
sam_close(out);
|
||||||
|
bam_hdr_destroy(h);
|
||||||
|
#endif
|
||||||
if (ks2) {
|
if (ks2) {
|
||||||
kseq_destroy(ks2);
|
kseq_destroy(ks2);
|
||||||
err_gzclose(fp2); kclose(ko2);
|
err_gzclose(fp2); kclose(ko2);
|
||||||
|
|
|
||||||
7
main.c
7
main.c
|
|
@ -86,8 +86,15 @@ int main(int argc, char *argv[])
|
||||||
fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
|
fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
#ifdef USE_HTSLIB
|
||||||
|
if (strcmp(argv[1], "mem") != 0) {
|
||||||
|
err_fflush(stdout);
|
||||||
|
err_fclose(stdout);
|
||||||
|
}
|
||||||
|
#else
|
||||||
err_fflush(stdout);
|
err_fflush(stdout);
|
||||||
err_fclose(stdout);
|
err_fclose(stdout);
|
||||||
|
#endif
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
fprintf(stderr, "[%s] Version: %s\n", __func__, PACKAGE_VERSION);
|
fprintf(stderr, "[%s] Version: %s\n", __func__, PACKAGE_VERSION);
|
||||||
fprintf(stderr, "[%s] CMD:", __func__);
|
fprintf(stderr, "[%s] CMD:", __func__);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue