merge bseq.{h,c} to utils.{h,c}

I do not like many small files.
2013-02-12 10:36:15 -05:00 · 2013-02-12 10:36:15 -05:00 · 95d18449b3
parent 6ad5a3c086
commit 95d18449b3
9 changed files with 73 additions and 75 deletions
--- a/2
+++ b/2
@ -4,7 +4,7 @@ CXXFLAGS=	$(CFLAGS)
 AR=			ar
 DFLAGS=		-DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
 LOBJS=		bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
-			bseq.o bwaseqio.o bwase.o kstring.o
+			bwaseqio.o bwase.o kstring.o
 AOBJS=		QSufSort.o bwt_gen.o \
 			is.o bwtmisc.o bwtindex.o ksw.o bwape.o \
 			bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
--- a/bseq.c
+++ b/bseq.c
@ -1,55 +0,0 @@
-#include <zlib.h>
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "bseq.h"
-#include "kseq.h"
-KSEQ_INIT2(, gzFile, gzread)
-
-static inline void trim_readno(kstring_t *s)
-{
-	if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
-		s->l -= 2, s->s[s->l] = 0;
-}
-
-static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
-{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
-	s->name = strdup(ks->name.s);
-	s->comment = ks->comment.l? strdup(s->comment) : 0;
-	s->seq = strdup(ks->seq.s);
-	s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
-	s->l_seq = strlen(s->seq);
-}
-
-bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
-{
-	kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
-	int size = 0, m, n;
-	bseq1_t *seqs;
-	m = n = 0; seqs = 0;
-	while (kseq_read(ks) >= 0) {
-		if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
-			fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
-			break;
-		}
-		if (n >= m) {
-			m = m? m<<1 : 256;
-			seqs = realloc(seqs, m * sizeof(bseq1_t));
-		}
-		trim_readno(&ks->name);
-		kseq2bseq1(ks, &seqs[n]);
-		size += seqs[n++].l_seq;
-		if (ks2) {
-			trim_readno(&ks2->name);
-			kseq2bseq1(ks2, &seqs[n]);
-			size += seqs[n++].l_seq;
-		}
-		if (size >= chunk_size) break;
-	}
-	if (size == 0) { // test if the 2nd file is finished
-		if (ks2 && kseq_read(ks2) >= 0)
-			fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
-	}
-	*n_ = n;
-	return seqs;
-}
--- a/bseq.h
+++ b/bseq.h
@ -1,11 +0,0 @@
-#ifndef BATCHSEQ_H_
-#define BATCHSEQ_H_
-
-typedef struct {
-	int l_seq;
-	char *name, *comment, *seq, *qual, *sam;
-} bseq1_t;
-
-bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
-
-#endif
--- a/bwamem.c
+++ b/bwamem.c
@ -10,6 +10,7 @@
 #include "bwamem.h"
 #include "bntseq.h"
 #include "ksw.h"
+#include "kvec.h"
 #include "ksort.h"

 #define MAPQ_COEF 40.
--- a/bwamem.h
+++ b/bwamem.h
@ -3,8 +3,7 @@

 #include "bwt.h"
 #include "bntseq.h"
-#include "bseq.h"
-#include "kvec.h"
+#include "utils.h"

 struct __smem_i;
 typedef struct __smem_i smem_i;
@ -51,8 +50,8 @@ typedef struct {
 	int64_t mb, me; // mb: mate start; -1 if single-end; -2 if mate unmapped
 } bwahit_t;

-typedef kvec_t(mem_chain_t)  mem_chain_v;
-typedef kvec_t(mem_alnreg_t) mem_alnreg_v;
+typedef struct { size_t n, m; mem_chain_t *a;  } mem_chain_v;
+typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;

 extern int mem_verbose;

--- a/bwtsw2_aux.c
+++ b/bwtsw2_aux.c
@ -13,7 +13,6 @@
 #include "bwtsw2.h"
 #include "stdaln.h"
 #include "kstring.h"
-#include "bseq.h"

 #include "kseq.h"
 KSEQ_DECLARE(gzFile)
--- a/fastmap.c
+++ b/fastmap.c
@ -6,7 +6,7 @@
 #include "bwt.h"
 #include "bwamem.h"
 #include "kvec.h"
-#include "bseq.h"
+#include "utils.h"
 #include "kseq.h"
 KSEQ_DECLARE(gzFile)

--- a/utils.c
+++ b/utils.c
@ -35,9 +35,8 @@
 #include <sys/time.h>
 #include "utils.h"

-#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
-
 #include "ksort.h"
+#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
 KSORT_INIT(128, pair64_t, pair64_lt)
 KSORT_INIT(64,  uint64_t, ks_lt_generic)

@ -139,6 +138,10 @@ int err_fclose(FILE *stream)
 	return ret;
 }

+/*********
+ * Timer *
+ *********/
+
 double cputime()
 {
 	struct rusage r;
@ -153,3 +156,58 @@ double realtime()
 	gettimeofday(&tp, &tzp);
 	return tp.tv_sec + tp.tv_usec * 1e-6;
 }
+
+/************************
+ * Batch FASTA/Q reader *
+ ************************/
+
+#include "kseq.h"
+KSEQ_INIT2(, gzFile, gzread)
+
+static inline void trim_readno(kstring_t *s)
+{
+	if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
+		s->l -= 2, s->s[s->l] = 0;
+}
+
+static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
+{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
+	s->name = strdup(ks->name.s);
+	s->comment = ks->comment.l? strdup(s->comment) : 0;
+	s->seq = strdup(ks->seq.s);
+	s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
+	s->l_seq = strlen(s->seq);
+}
+
+bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
+{
+	kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
+	int size = 0, m, n;
+	bseq1_t *seqs;
+	m = n = 0; seqs = 0;
+	while (kseq_read(ks) >= 0) {
+		if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
+			fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
+			break;
+		}
+		if (n >= m) {
+			m = m? m<<1 : 256;
+			seqs = realloc(seqs, m * sizeof(bseq1_t));
+		}
+		trim_readno(&ks->name);
+		kseq2bseq1(ks, &seqs[n]);
+		size += seqs[n++].l_seq;
+		if (ks2) {
+			trim_readno(&ks2->name);
+			kseq2bseq1(ks2, &seqs[n]);
+			size += seqs[n++].l_seq;
+		}
+		if (size >= chunk_size) break;
+	}
+	if (size == 0) { // test if the 2nd file is finished
+		if (ks2 && kseq_read(ks2) >= 0)
+			fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
+	}
+	*n_ = n;
+	return seqs;
+}
--- a/utils.h
+++ b/utils.h
@ -52,6 +52,11 @@ typedef struct {
 typedef struct { size_t n, m; uint64_t *a; } uint64_v;
 typedef struct { size_t n, m; pair64_t *a; } pair64_v;

+typedef struct {
+	int l_seq;
+	char *name, *comment, *seq, *qual, *sam;
+} bseq1_t;
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -75,6 +80,8 @@ extern "C" {
 	void ks_introsort_64 (size_t n, uint64_t *a);
 	void ks_introsort_128(size_t n, pair64_t *a);

+	bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
+
 #ifdef __cplusplus
 }
 #endif