From d1a5715e5667651b534cebb2cf6d570132ffa5b5 Mon Sep 17 00:00:00 2001 From: zzh Date: Sat, 6 Apr 2024 20:53:42 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86fputs=E6=94=B9=E6=88=90fwrite=EF=BC=8C?= =?UTF-8?q?=E5=86=99=E5=85=A5=E6=95=88=E7=8E=87=E6=8F=90=E5=8D=8730%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 4 ++-- fastmap.c | 40 +++++++++++++++++++++++++++++++++++++--- kseq.h | 8 ++++---- run.sh | 27 ++++++--------------------- 4 files changed, 49 insertions(+), 30 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 7a727e3..110a3b5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,8 +18,8 @@ "-R", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", "~/reference/bwa/human_g1k_v37_decoy.fasta", - "~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq", - "~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq", + "~/data/fastq/dataset/na12878_wes_144/s_1.fq", + "~/data/fastq/dataset/na12878_wes_144/s_2.fq", "-o", "/dev/null" ], diff --git a/fastmap.c b/fastmap.c index f2c160a..007d71a 100644 --- a/fastmap.c +++ b/fastmap.c @@ -121,6 +121,8 @@ typedef struct { mem_worker_t *w; int data_idx; // pingpong buffer index ktp_data_t *data; + int wbuf_size; + char *wbuf; volatile int read_complete; volatile int calc_complete; long read_idx; @@ -248,12 +250,40 @@ static inline void *write_data(ktp_aux_t *aux, ktp_data_t *data) #endif #endif //int64_t ms = 0; + int buf_written = 0; for (i = 0; i < data->n_sams; ++i) { - //ms += data->sams[i].sam.m; - if (data->sams[i].sam.l) - err_fputs(data->sams[i].sam.s, stdout); + const int slen = data->sams[i].sam.l; + if (slen && (buf_written + slen) < aux->wbuf_size) + { + memcpy(&aux->wbuf[buf_written], data->sams[i].sam.s, slen); + buf_written += slen; + } + else if (buf_written > 0) + { + err_fwrite(aux->wbuf, 1, buf_written, stdout); + if ((buf_written + slen) >= aux->wbuf_size) + { + memcpy(&aux->wbuf[0], data->sams[i].sam.s, slen); + buf_written = slen; + } + else + { + buf_written = 0; + } + } } + if (buf_written > 0) { + err_fwrite(aux->wbuf, 1, buf_written, stdout); + } + + //for (i = 0; i < data->n_sams; ++i) + //{ + // //ms += data->sams[i].sam.m; + // if (data->sams[i].sam.l) + // err_fputs(data->sams[i].sam.s, stdout); + //} + //fprintf(stderr, "sam size: %ld M\n", ms / 1024 / 1024); #ifdef SHOW_PERF @@ -723,6 +753,10 @@ int main_mem(int argc, char *argv[]) bwa_print_sam_hdr(aux.idx->bns, hdr_line); aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; + // allocate write buffer + aux.wbuf_size = 16777216; + aux.wbuf = malloc(aux.wbuf_size); + #ifdef SHOW_PERF #if USE_RDTSC tmp_time1 = __rdtsc(); diff --git a/kseq.h b/kseq.h index f3862c6..2107f80 100644 --- a/kseq.h +++ b/kseq.h @@ -221,10 +221,10 @@ typedef struct __kstring_t { kstream_t *f; \ } kseq_t; -#define KSEQ_INIT2(SCOPE, type_t, __read) \ - KSTREAM_INIT(type_t, __read, 16384) \ - __KSEQ_TYPE(type_t) \ - __KSEQ_BASIC(SCOPE, type_t) \ +#define KSEQ_INIT2(SCOPE, type_t, __read) \ + KSTREAM_INIT(type_t, __read, 16777216) /* 16384 */ \ + __KSEQ_TYPE(type_t) \ + __KSEQ_BASIC(SCOPE, type_t) \ __KSEQ_READ(SCOPE) #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) diff --git a/run.sh b/run.sh index ed1408b..ab1aeb8 100755 --- a/run.sh +++ b/run.sh @@ -20,38 +20,23 @@ thread=64 ## d4 #n_r1=~/data/fastq/dataset/zy_wes/s_1.fq #n_r2=~/data/fastq/dataset/zy_wes/s_2.fq -n_r1=~/data/fastq/dataset/zy_wes/45mr1.fq.gz -n_r2=~/data/fastq/dataset/zy_wes/45mr2.fq.gz +#n_r1=~/data/fastq/dataset/zy_wes/45mr1.fq.gz +#n_r2=~/data/fastq/dataset/zy_wes/45mr2.fq.gz ## d5 #n_r1=~/data/fastq/dataset/zy_wgs/45mr1.fq.gz #n_r2=~/data/fastq/dataset/zy_wgs/45mr2.fq.gz #n_r1=~/data/fastq/dataset/zy_wgs/s_1.fq #n_r2=~/data/fastq/dataset/zy_wgs/s_2.fq -#n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq -#n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq +n_r1=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq +n_r2=~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta #reference=~/reference/bwa/human_g1k_v37_decoy.fasta #reference=~/data/reference/human_g1k_v37_decoy.fasta -#out=./all.sam -#out=./sn.sam -#out=./ssn-x1.sam -out=~/data1/d4-45m.sam -#out=~/data/out1.sam -#out=/dev/null -#out=./na12878.sam -#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ -# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ -# /home/zzh/data/fastq/nm1.fq \ -# /home/zzh/data/fastq/nm2.fq -o /dev/null -#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ -# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ -# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_1.fq.gz \ -# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \ -# -o /dev/null +out=~/data1/fast-out.sam -time ./fastbwa mem -t $thread -b 256 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ +time ./fastbwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ $reference \ $n_r1 \ $n_r2 \