将fputs改成fwrite,写入效率提升30%

This commit is contained in:
zzh 2024-04-06 20:53:42 +08:00
parent d41c038616
commit d1a5715e56
4 changed files with 49 additions and 30 deletions

4
.vscode/launch.json vendored
View File

@ -18,8 +18,8 @@
"-R", "-R",
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
"~/reference/bwa/human_g1k_v37_decoy.fasta", "~/reference/bwa/human_g1k_v37_decoy.fasta",
"~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq", "~/data/fastq/dataset/na12878_wes_144/s_1.fq",
"~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq", "~/data/fastq/dataset/na12878_wes_144/s_2.fq",
"-o", "-o",
"/dev/null" "/dev/null"
], ],

View File

@ -121,6 +121,8 @@ typedef struct {
mem_worker_t *w; mem_worker_t *w;
int data_idx; // pingpong buffer index int data_idx; // pingpong buffer index
ktp_data_t *data; ktp_data_t *data;
int wbuf_size;
char *wbuf;
volatile int read_complete; volatile int read_complete;
volatile int calc_complete; volatile int calc_complete;
long read_idx; long read_idx;
@ -248,12 +250,40 @@ static inline void *write_data(ktp_aux_t *aux, ktp_data_t *data)
#endif #endif
#endif #endif
//int64_t ms = 0; //int64_t ms = 0;
int buf_written = 0;
for (i = 0; i < data->n_sams; ++i) for (i = 0; i < data->n_sams; ++i)
{ {
//ms += data->sams[i].sam.m; const int slen = data->sams[i].sam.l;
if (data->sams[i].sam.l) if (slen && (buf_written + slen) < aux->wbuf_size)
err_fputs(data->sams[i].sam.s, stdout); {
memcpy(&aux->wbuf[buf_written], data->sams[i].sam.s, slen);
buf_written += slen;
}
else if (buf_written > 0)
{
err_fwrite(aux->wbuf, 1, buf_written, stdout);
if ((buf_written + slen) >= aux->wbuf_size)
{
memcpy(&aux->wbuf[0], data->sams[i].sam.s, slen);
buf_written = slen;
}
else
{
buf_written = 0;
}
}
} }
if (buf_written > 0) {
err_fwrite(aux->wbuf, 1, buf_written, stdout);
}
//for (i = 0; i < data->n_sams; ++i)
//{
// //ms += data->sams[i].sam.m;
// if (data->sams[i].sam.l)
// err_fputs(data->sams[i].sam.s, stdout);
//}
//fprintf(stderr, "sam size: %ld M\n", ms / 1024 / 1024); //fprintf(stderr, "sam size: %ld M\n", ms / 1024 / 1024);
#ifdef SHOW_PERF #ifdef SHOW_PERF
@ -723,6 +753,10 @@ int main_mem(int argc, char *argv[])
bwa_print_sam_hdr(aux.idx->bns, hdr_line); bwa_print_sam_hdr(aux.idx->bns, hdr_line);
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
// allocate write buffer
aux.wbuf_size = 16777216;
aux.wbuf = malloc(aux.wbuf_size);
#ifdef SHOW_PERF #ifdef SHOW_PERF
#if USE_RDTSC #if USE_RDTSC
tmp_time1 = __rdtsc(); tmp_time1 = __rdtsc();

8
kseq.h
View File

@ -221,10 +221,10 @@ typedef struct __kstring_t {
kstream_t *f; \ kstream_t *f; \
} kseq_t; } kseq_t;
#define KSEQ_INIT2(SCOPE, type_t, __read) \ #define KSEQ_INIT2(SCOPE, type_t, __read) \
KSTREAM_INIT(type_t, __read, 16384) \ KSTREAM_INIT(type_t, __read, 16777216) /* 16384 */ \
__KSEQ_TYPE(type_t) \ __KSEQ_TYPE(type_t) \
__KSEQ_BASIC(SCOPE, type_t) \ __KSEQ_BASIC(SCOPE, type_t) \
__KSEQ_READ(SCOPE) __KSEQ_READ(SCOPE)
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)

27
run.sh
View File

@ -20,38 +20,23 @@ thread=64
## d4 ## d4
#n_r1=~/data/fastq/dataset/zy_wes/s_1.fq #n_r1=~/data/fastq/dataset/zy_wes/s_1.fq
#n_r2=~/data/fastq/dataset/zy_wes/s_2.fq #n_r2=~/data/fastq/dataset/zy_wes/s_2.fq
n_r1=~/data/fastq/dataset/zy_wes/45mr1.fq.gz #n_r1=~/data/fastq/dataset/zy_wes/45mr1.fq.gz
n_r2=~/data/fastq/dataset/zy_wes/45mr2.fq.gz #n_r2=~/data/fastq/dataset/zy_wes/45mr2.fq.gz
## d5 ## d5
#n_r1=~/data/fastq/dataset/zy_wgs/45mr1.fq.gz #n_r1=~/data/fastq/dataset/zy_wgs/45mr1.fq.gz
#n_r2=~/data/fastq/dataset/zy_wgs/45mr2.fq.gz #n_r2=~/data/fastq/dataset/zy_wgs/45mr2.fq.gz
#n_r1=~/data/fastq/dataset/zy_wgs/s_1.fq #n_r1=~/data/fastq/dataset/zy_wgs/s_1.fq
#n_r2=~/data/fastq/dataset/zy_wgs/s_2.fq #n_r2=~/data/fastq/dataset/zy_wgs/s_2.fq
#n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq
#n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq
reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta
#reference=~/reference/bwa/human_g1k_v37_decoy.fasta #reference=~/reference/bwa/human_g1k_v37_decoy.fasta
#reference=~/data/reference/human_g1k_v37_decoy.fasta #reference=~/data/reference/human_g1k_v37_decoy.fasta
#out=./all.sam
#out=./sn.sam
#out=./ssn-x1.sam
out=~/data1/d4-45m.sam
#out=~/data/out1.sam
#out=/dev/null
#out=./na12878.sam
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /home/zzh/data/fastq/nm1.fq \
# /home/zzh/data/fastq/nm2.fq -o /dev/null
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ out=~/data1/fast-out.sam
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_1.fq.gz \
# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \
# -o /dev/null
time ./fastbwa mem -t $thread -b 256 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ time ./fastbwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
$reference \ $reference \
$n_r1 \ $n_r1 \
$n_r2 \ $n_r2 \