merge bseq.{h,c} to utils.{h,c}
I do not like many small files.
This commit is contained in:
parent
6ad5a3c086
commit
95d18449b3
2
Makefile
2
Makefile
|
|
@ -4,7 +4,7 @@ CXXFLAGS= $(CFLAGS)
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
||||||
LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
|
LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
|
||||||
bseq.o bwaseqio.o bwase.o kstring.o
|
bwaseqio.o bwase.o kstring.o
|
||||||
AOBJS= QSufSort.o bwt_gen.o \
|
AOBJS= QSufSort.o bwt_gen.o \
|
||||||
is.o bwtmisc.o bwtindex.o ksw.o bwape.o \
|
is.o bwtmisc.o bwtindex.o ksw.o bwape.o \
|
||||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||||
|
|
|
||||||
55
bseq.c
55
bseq.c
|
|
@ -1,55 +0,0 @@
|
||||||
#include <zlib.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "bseq.h"
|
|
||||||
#include "kseq.h"
|
|
||||||
KSEQ_INIT2(, gzFile, gzread)
|
|
||||||
|
|
||||||
static inline void trim_readno(kstring_t *s)
|
|
||||||
{
|
|
||||||
if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
|
|
||||||
s->l -= 2, s->s[s->l] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
|
|
||||||
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
|
|
||||||
s->name = strdup(ks->name.s);
|
|
||||||
s->comment = ks->comment.l? strdup(s->comment) : 0;
|
|
||||||
s->seq = strdup(ks->seq.s);
|
|
||||||
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
|
|
||||||
s->l_seq = strlen(s->seq);
|
|
||||||
}
|
|
||||||
|
|
||||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
|
||||||
{
|
|
||||||
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
|
||||||
int size = 0, m, n;
|
|
||||||
bseq1_t *seqs;
|
|
||||||
m = n = 0; seqs = 0;
|
|
||||||
while (kseq_read(ks) >= 0) {
|
|
||||||
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
|
|
||||||
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (n >= m) {
|
|
||||||
m = m? m<<1 : 256;
|
|
||||||
seqs = realloc(seqs, m * sizeof(bseq1_t));
|
|
||||||
}
|
|
||||||
trim_readno(&ks->name);
|
|
||||||
kseq2bseq1(ks, &seqs[n]);
|
|
||||||
size += seqs[n++].l_seq;
|
|
||||||
if (ks2) {
|
|
||||||
trim_readno(&ks2->name);
|
|
||||||
kseq2bseq1(ks2, &seqs[n]);
|
|
||||||
size += seqs[n++].l_seq;
|
|
||||||
}
|
|
||||||
if (size >= chunk_size) break;
|
|
||||||
}
|
|
||||||
if (size == 0) { // test if the 2nd file is finished
|
|
||||||
if (ks2 && kseq_read(ks2) >= 0)
|
|
||||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
|
||||||
}
|
|
||||||
*n_ = n;
|
|
||||||
return seqs;
|
|
||||||
}
|
|
||||||
11
bseq.h
11
bseq.h
|
|
@ -1,11 +0,0 @@
|
||||||
#ifndef BATCHSEQ_H_
|
|
||||||
#define BATCHSEQ_H_
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int l_seq;
|
|
||||||
char *name, *comment, *seq, *qual, *sam;
|
|
||||||
} bseq1_t;
|
|
||||||
|
|
||||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
1
bwamem.c
1
bwamem.c
|
|
@ -10,6 +10,7 @@
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
#include "kvec.h"
|
||||||
#include "ksort.h"
|
#include "ksort.h"
|
||||||
|
|
||||||
#define MAPQ_COEF 40.
|
#define MAPQ_COEF 40.
|
||||||
|
|
|
||||||
7
bwamem.h
7
bwamem.h
|
|
@ -3,8 +3,7 @@
|
||||||
|
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "bseq.h"
|
#include "utils.h"
|
||||||
#include "kvec.h"
|
|
||||||
|
|
||||||
struct __smem_i;
|
struct __smem_i;
|
||||||
typedef struct __smem_i smem_i;
|
typedef struct __smem_i smem_i;
|
||||||
|
|
@ -51,8 +50,8 @@ typedef struct {
|
||||||
int64_t mb, me; // mb: mate start; -1 if single-end; -2 if mate unmapped
|
int64_t mb, me; // mb: mate start; -1 if single-end; -2 if mate unmapped
|
||||||
} bwahit_t;
|
} bwahit_t;
|
||||||
|
|
||||||
typedef kvec_t(mem_chain_t) mem_chain_v;
|
typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
|
||||||
typedef kvec_t(mem_alnreg_t) mem_alnreg_v;
|
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
||||||
|
|
||||||
extern int mem_verbose;
|
extern int mem_verbose;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
#include "bwtsw2.h"
|
#include "bwtsw2.h"
|
||||||
#include "stdaln.h"
|
#include "stdaln.h"
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
#include "bseq.h"
|
|
||||||
|
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "bseq.h"
|
#include "utils.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
|
|
|
||||||
62
utils.c
62
utils.c
|
|
@ -35,9 +35,8 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
|
|
||||||
|
|
||||||
#include "ksort.h"
|
#include "ksort.h"
|
||||||
|
#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
|
||||||
KSORT_INIT(128, pair64_t, pair64_lt)
|
KSORT_INIT(128, pair64_t, pair64_lt)
|
||||||
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
||||||
|
|
||||||
|
|
@ -139,6 +138,10 @@ int err_fclose(FILE *stream)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*********
|
||||||
|
* Timer *
|
||||||
|
*********/
|
||||||
|
|
||||||
double cputime()
|
double cputime()
|
||||||
{
|
{
|
||||||
struct rusage r;
|
struct rusage r;
|
||||||
|
|
@ -153,3 +156,58 @@ double realtime()
|
||||||
gettimeofday(&tp, &tzp);
|
gettimeofday(&tp, &tzp);
|
||||||
return tp.tv_sec + tp.tv_usec * 1e-6;
|
return tp.tv_sec + tp.tv_usec * 1e-6;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/************************
|
||||||
|
* Batch FASTA/Q reader *
|
||||||
|
************************/
|
||||||
|
|
||||||
|
#include "kseq.h"
|
||||||
|
KSEQ_INIT2(, gzFile, gzread)
|
||||||
|
|
||||||
|
static inline void trim_readno(kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
|
||||||
|
s->l -= 2, s->s[s->l] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
|
||||||
|
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
|
||||||
|
s->name = strdup(ks->name.s);
|
||||||
|
s->comment = ks->comment.l? strdup(s->comment) : 0;
|
||||||
|
s->seq = strdup(ks->seq.s);
|
||||||
|
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
|
||||||
|
s->l_seq = strlen(s->seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
||||||
|
{
|
||||||
|
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
||||||
|
int size = 0, m, n;
|
||||||
|
bseq1_t *seqs;
|
||||||
|
m = n = 0; seqs = 0;
|
||||||
|
while (kseq_read(ks) >= 0) {
|
||||||
|
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
|
||||||
|
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (n >= m) {
|
||||||
|
m = m? m<<1 : 256;
|
||||||
|
seqs = realloc(seqs, m * sizeof(bseq1_t));
|
||||||
|
}
|
||||||
|
trim_readno(&ks->name);
|
||||||
|
kseq2bseq1(ks, &seqs[n]);
|
||||||
|
size += seqs[n++].l_seq;
|
||||||
|
if (ks2) {
|
||||||
|
trim_readno(&ks2->name);
|
||||||
|
kseq2bseq1(ks2, &seqs[n]);
|
||||||
|
size += seqs[n++].l_seq;
|
||||||
|
}
|
||||||
|
if (size >= chunk_size) break;
|
||||||
|
}
|
||||||
|
if (size == 0) { // test if the 2nd file is finished
|
||||||
|
if (ks2 && kseq_read(ks2) >= 0)
|
||||||
|
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||||
|
}
|
||||||
|
*n_ = n;
|
||||||
|
return seqs;
|
||||||
|
}
|
||||||
|
|
|
||||||
7
utils.h
7
utils.h
|
|
@ -52,6 +52,11 @@ typedef struct {
|
||||||
typedef struct { size_t n, m; uint64_t *a; } uint64_v;
|
typedef struct { size_t n, m; uint64_t *a; } uint64_v;
|
||||||
typedef struct { size_t n, m; pair64_t *a; } pair64_v;
|
typedef struct { size_t n, m; pair64_t *a; } pair64_v;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int l_seq;
|
||||||
|
char *name, *comment, *seq, *qual, *sam;
|
||||||
|
} bseq1_t;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -75,6 +80,8 @@ extern "C" {
|
||||||
void ks_introsort_64 (size_t n, uint64_t *a);
|
void ks_introsort_64 (size_t n, uint64_t *a);
|
||||||
void ks_introsort_128(size_t n, pair64_t *a);
|
void ks_introsort_128(size_t n, pair64_t *a);
|
||||||
|
|
||||||
|
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue