加入了获取reads和extension数据的代码
This commit is contained in:
parent
1e3965cb7d
commit
dd03596997
|
|
@ -2,6 +2,8 @@
|
||||||
*.txt
|
*.txt
|
||||||
*.sam
|
*.sam
|
||||||
*.log
|
*.log
|
||||||
|
*.fa
|
||||||
|
dataset/
|
||||||
bwa
|
bwa
|
||||||
test
|
test
|
||||||
test64
|
test64
|
||||||
|
|
|
||||||
18
bwamem.c
18
bwamem.c
|
|
@ -1482,7 +1482,23 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FILTER_FULL_MATCH
|
#ifdef FILTER_FULL_MATCH
|
||||||
if (max_seed_len == len - start_N_num) goto collect_intv_end;
|
if (max_seed_len == len - start_N_num) {
|
||||||
|
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (start_N_num == 0) {
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
fprintf(gfp1, "%c", "ACGT"[seq[i]]);
|
||||||
|
fprintf(gfp1, "\n");
|
||||||
|
#ifdef SHOW_DATA_PERF
|
||||||
|
gdat[2]++;
|
||||||
|
if (gdat[2] % 100 == 0) {
|
||||||
|
fprintf(stderr, "reads num: %ld\n", gdat[2]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
goto collect_intv_end;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 2. second pass: find MEMs inside a long SMEM
|
// 2. second pass: find MEMs inside a long SMEM
|
||||||
|
|
|
||||||
30
fastmap.c
30
fastmap.c
|
|
@ -92,7 +92,8 @@ int64_t gdat[100];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
FILE * gfp1, *gfp2, *gfp3;
|
FILE * gfp1, *gfp2, *gfp3, *gfp4;
|
||||||
|
FILE *gfq[4], *gft[4], *gfi[4];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
@ -414,6 +415,18 @@ int main_mem(int argc, char *argv[])
|
||||||
gfp1 = fopen("./fp1.txt", "w");
|
gfp1 = fopen("./fp1.txt", "w");
|
||||||
gfp2 = fopen("./fp2.txt", "w");
|
gfp2 = fopen("./fp2.txt", "w");
|
||||||
gfp3 = fopen("./fp3.txt", "w");
|
gfp3 = fopen("./fp3.txt", "w");
|
||||||
|
gfp4 = fopen("./fp4.txt", "w");
|
||||||
|
char fnbuf[1024];
|
||||||
|
int ii;
|
||||||
|
for (ii = 0; ii < 4; ++ii)
|
||||||
|
{
|
||||||
|
sprintf(fnbuf, "./ext_out/q_%d.fa", ii);
|
||||||
|
gfq[ii] = fopen(fnbuf, "w");
|
||||||
|
sprintf(fnbuf, "./ext_out/t_%d.fa", ii);
|
||||||
|
gft[ii] = fopen(fnbuf, "w");
|
||||||
|
sprintf(fnbuf, "./ext_out/i_%d.txt", ii);
|
||||||
|
gfi[ii] = fopen(fnbuf, "w");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
|
|
@ -776,13 +789,15 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, "time_process_seq: %0.2lf s\n", (time_work_kernel + time_work_sam) * 1.0 / proc_freq);
|
fprintf(stderr, "time_process_seq: %0.2lf s\n", (time_work_kernel + time_work_sam) * 1.0 / proc_freq);
|
||||||
|
|
||||||
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_seed_all * 1.0 / proc_freq / opt->n_threads, min_seed_all * 1.0 / proc_freq, max_seed_all * 1.0 / proc_freq);
|
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_seed_all * 1.0 / proc_freq / opt->n_threads, min_seed_all * 1.0 / proc_freq, max_seed_all * 1.0 / proc_freq);
|
||||||
fprintf(stderr, "time_seed_1: %0.2lf s\n", avg_seed_1 * 1.0 / proc_freq / opt->n_threads);
|
|
||||||
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg_seed_2 * 1.0 / proc_freq / opt->n_threads);
|
|
||||||
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg_seed_3 * 1.0 / proc_freq / opt->n_threads);
|
|
||||||
fprintf(stderr, "time_sa: %0.2lf s\n", avg_sa * 1.0 / proc_freq / opt->n_threads);
|
fprintf(stderr, "time_sa: %0.2lf s\n", avg_sa * 1.0 / proc_freq / opt->n_threads);
|
||||||
fprintf(stderr, "time_chain: %0.2lf s\n", avg_chain * 1.0 / proc_freq / opt->n_threads);
|
fprintf(stderr, "time_chain: %0.2lf s\n", avg_chain * 1.0 / proc_freq / opt->n_threads);
|
||||||
|
|
||||||
fprintf(stderr, "time_bsw_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_bsw_all * 1.0 / proc_freq / opt->n_threads, min_bsw_all * 1.0 / proc_freq, max_bsw_all * 1.0 / proc_freq);
|
fprintf(stderr, "time_bsw_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_bsw_all * 1.0 / proc_freq / opt->n_threads, min_bsw_all * 1.0 / proc_freq, max_bsw_all * 1.0 / proc_freq);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
fprintf(stderr, "time_seed_1: %0.2lf s\n", avg_seed_1 * 1.0 / proc_freq / opt->n_threads);
|
||||||
|
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg_seed_2 * 1.0 / proc_freq / opt->n_threads);
|
||||||
|
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg_seed_3 * 1.0 / proc_freq / opt->n_threads);
|
||||||
|
fprintf(stderr, "time_seed_chain: %0.2lf s\n", (avg_seed_all + avg_chain) * 1.0 / proc_freq / opt->n_threads);
|
||||||
fprintf(stderr, "time_bsw: %0.2lf s\n", avg_bsw * 1.0 / proc_freq / opt->n_threads);
|
fprintf(stderr, "time_bsw: %0.2lf s\n", avg_bsw * 1.0 / proc_freq / opt->n_threads);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
|
|
@ -798,6 +813,13 @@ int main_mem(int argc, char *argv[])
|
||||||
fclose(gfp1);
|
fclose(gfp1);
|
||||||
fclose(gfp2);
|
fclose(gfp2);
|
||||||
fclose(gfp3);
|
fclose(gfp3);
|
||||||
|
fclose(gfp4);
|
||||||
|
for (ii = 0; ii < 4; ++ii)
|
||||||
|
{
|
||||||
|
fclose(gfq[ii]);
|
||||||
|
fclose(gft[ii]);
|
||||||
|
fclose(gfi[ii]);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
|
||||||
#ifdef SHOW_PERF
|
#ifdef DEBUG_OUTPUT
|
||||||
extern FILE *gfp1, *gfp2, *gfp3;
|
extern FILE *gfp1, *gfp2, *gfp3;
|
||||||
|
extern FILE *gfq[4], *gft[4], *gfi[4];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define NO_VAL -1
|
#define NO_VAL -1
|
||||||
|
|
@ -172,6 +173,25 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
||||||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||||
|
|
||||||
|
void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||||
|
{
|
||||||
|
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||||
|
FILE *query_f = gfq[fnum],
|
||||||
|
*target_f = gft[fnum],
|
||||||
|
*info_f = gfi[fnum];
|
||||||
|
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||||
|
int i;
|
||||||
|
// 处理query
|
||||||
|
for (i = 0; i < qlen; ++i)
|
||||||
|
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||||
|
fprintf(query_f, "\n");
|
||||||
|
// 处理target
|
||||||
|
for (i = 0; i < tlen; ++i)
|
||||||
|
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||||
|
fprintf(target_f, "\n");
|
||||||
|
// 处理其他信息
|
||||||
|
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||||
|
}
|
||||||
|
|
||||||
int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||||
const uint8_t *query, // read碱基序列
|
const uint8_t *query, // read碱基序列
|
||||||
|
|
@ -201,6 +221,15 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||||
|
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
//fprintf(gfp1, "%d\n", qlen);
|
//fprintf(gfp1, "%d\n", qlen);
|
||||||
|
if (qlen <= 30) {
|
||||||
|
write_query_target_sequence(qlen, query, tlen, target, h0, 0);
|
||||||
|
} else if (qlen < 60) {
|
||||||
|
write_query_target_sequence(qlen, query, tlen, target, h0, 1);
|
||||||
|
} else if (qlen < 90) {
|
||||||
|
write_query_target_sequence(qlen, query, tlen, target, h0, 2);
|
||||||
|
} else {
|
||||||
|
write_query_target_sequence(qlen, query, tlen, target, h0, 3);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
||||||
|
|
|
||||||
17
run.sh
17
run.sh
|
|
@ -3,12 +3,14 @@ thread=1
|
||||||
#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq
|
#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq
|
||||||
#n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq
|
#n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq
|
||||||
#n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq
|
#n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq
|
||||||
#n_r1=~/data/fastq/dataset/na12878_wgs_150/n1.fq
|
#n_r1=~/data/fastq/dataset/na12878_wgs_150/s_1.fq
|
||||||
#n_r2=~/data/fastq/dataset/na12878_wgs_150/n2.fq
|
#n_r2=~/data/fastq/dataset/na12878_wgs_150/s_2.fq
|
||||||
#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq
|
#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq
|
||||||
#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq
|
#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq
|
||||||
n_r1=~/data/fastq/dataset/zy_wes/sn1.fq
|
n_r1=~/data/fastq/dataset/zy_wes/s_1.fq
|
||||||
n_r2=~/data/fastq/dataset/zy_wes/sn2.fq
|
n_r2=~/data/fastq/dataset/zy_wes/s_2.fq
|
||||||
|
#n_r1=~/data/fastq/dataset/zy_wes/bloodgDNA_r1.fastq
|
||||||
|
#n_r2=~/data/fastq/dataset/zy_wes/bloodgDNA_r2.fastq
|
||||||
|
|
||||||
|
|
||||||
#n_r1=~/data/fastq/zy/wgs/n1.fq
|
#n_r1=~/data/fastq/zy/wgs/n1.fq
|
||||||
|
|
@ -54,12 +56,13 @@ n_r2=~/data/fastq/dataset/zy_wes/sn2.fq
|
||||||
#n_r2=~/fastq/diff_r2.fq
|
#n_r2=~/fastq/diff_r2.fq
|
||||||
#n_r1=~/fastq/d_r1.fq
|
#n_r1=~/fastq/d_r1.fq
|
||||||
#n_r2=~/fastq/d_r2.fq
|
#n_r2=~/fastq/d_r2.fq
|
||||||
reference=~/data/reference/human_g1k_v37_decoy.fasta
|
reference=~/reference/human_g1k_v37_decoy.fasta
|
||||||
|
#reference=~/data/reference/human_g1k_v37_decoy.fasta
|
||||||
#out=./all.sam
|
#out=./all.sam
|
||||||
#out=./sn.sam
|
#out=./sn.sam
|
||||||
#out=./ssn-x1.sam
|
#out=./ssn-x1.sam
|
||||||
out=./out.sam
|
#out=./out.sam
|
||||||
#out=/dev/null
|
out=/dev/null
|
||||||
#out=./na12878.sam
|
#out=./na12878.sam
|
||||||
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||||
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||||
|
|
|
||||||
5
utils.h
5
utils.h
|
|
@ -31,7 +31,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
#define USE_RDTSC 0
|
#define USE_RDTSC 1
|
||||||
|
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
extern uint64_t time_process_data, time_read, time_write, time_compute,
|
extern uint64_t time_process_data, time_read, time_write, time_compute,
|
||||||
|
|
@ -48,7 +48,8 @@ extern int64_t gdat[100];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
extern FILE *gfp1, *gfp2, *gfp3;
|
extern FILE *gfp1, *gfp2, *gfp3, *gfp4;
|
||||||
|
extern FILE *gfq[4], *gft[4], *gfi[4];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef MAX
|
#undef MAX
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue