加入了获取reads和extension数据的代码
This commit is contained in:
parent
1e3965cb7d
commit
dd03596997
|
|
@ -2,6 +2,8 @@
|
|||
*.txt
|
||||
*.sam
|
||||
*.log
|
||||
*.fa
|
||||
dataset/
|
||||
bwa
|
||||
test
|
||||
test64
|
||||
|
|
|
|||
18
bwamem.c
18
bwamem.c
|
|
@ -1482,7 +1482,23 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
|||
#endif
|
||||
|
||||
#ifdef FILTER_FULL_MATCH
|
||||
if (max_seed_len == len - start_N_num) goto collect_intv_end;
|
||||
if (max_seed_len == len - start_N_num) {
|
||||
|
||||
#ifdef DEBUG_OUTPUT
|
||||
if (start_N_num == 0) {
|
||||
for (i = 0; i < len; ++i)
|
||||
fprintf(gfp1, "%c", "ACGT"[seq[i]]);
|
||||
fprintf(gfp1, "\n");
|
||||
#ifdef SHOW_DATA_PERF
|
||||
gdat[2]++;
|
||||
if (gdat[2] % 100 == 0) {
|
||||
fprintf(stderr, "reads num: %ld\n", gdat[2]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
goto collect_intv_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
// 2. second pass: find MEMs inside a long SMEM
|
||||
|
|
|
|||
30
fastmap.c
30
fastmap.c
|
|
@ -92,7 +92,8 @@ int64_t gdat[100];
|
|||
#endif
|
||||
|
||||
#ifdef DEBUG_OUTPUT
|
||||
FILE * gfp1, *gfp2, *gfp3;
|
||||
FILE * gfp1, *gfp2, *gfp3, *gfp4;
|
||||
FILE *gfq[4], *gft[4], *gfi[4];
|
||||
#endif
|
||||
|
||||
extern unsigned char nst_nt4_table[256];
|
||||
|
|
@ -414,6 +415,18 @@ int main_mem(int argc, char *argv[])
|
|||
gfp1 = fopen("./fp1.txt", "w");
|
||||
gfp2 = fopen("./fp2.txt", "w");
|
||||
gfp3 = fopen("./fp3.txt", "w");
|
||||
gfp4 = fopen("./fp4.txt", "w");
|
||||
char fnbuf[1024];
|
||||
int ii;
|
||||
for (ii = 0; ii < 4; ++ii)
|
||||
{
|
||||
sprintf(fnbuf, "./ext_out/q_%d.fa", ii);
|
||||
gfq[ii] = fopen(fnbuf, "w");
|
||||
sprintf(fnbuf, "./ext_out/t_%d.fa", ii);
|
||||
gft[ii] = fopen(fnbuf, "w");
|
||||
sprintf(fnbuf, "./ext_out/i_%d.txt", ii);
|
||||
gfi[ii] = fopen(fnbuf, "w");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
|
|
@ -776,13 +789,15 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, "time_process_seq: %0.2lf s\n", (time_work_kernel + time_work_sam) * 1.0 / proc_freq);
|
||||
|
||||
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_seed_all * 1.0 / proc_freq / opt->n_threads, min_seed_all * 1.0 / proc_freq, max_seed_all * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_seed_1: %0.2lf s\n", avg_seed_1 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg_seed_2 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg_seed_3 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_sa: %0.2lf s\n", avg_sa * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_chain: %0.2lf s\n", avg_chain * 1.0 / proc_freq / opt->n_threads);
|
||||
|
||||
fprintf(stderr, "time_bsw_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg_bsw_all * 1.0 / proc_freq / opt->n_threads, min_bsw_all * 1.0 / proc_freq, max_bsw_all * 1.0 / proc_freq);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "time_seed_1: %0.2lf s\n", avg_seed_1 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg_seed_2 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg_seed_3 * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_seed_chain: %0.2lf s\n", (avg_seed_all + avg_chain) * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "time_bsw: %0.2lf s\n", avg_bsw * 1.0 / proc_freq / opt->n_threads);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
|
@ -798,6 +813,13 @@ int main_mem(int argc, char *argv[])
|
|||
fclose(gfp1);
|
||||
fclose(gfp2);
|
||||
fclose(gfp3);
|
||||
fclose(gfp4);
|
||||
for (ii = 0; ii < 4; ++ii)
|
||||
{
|
||||
fclose(gfq[ii]);
|
||||
fclose(gft[ii]);
|
||||
fclose(gfi[ii]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@
|
|||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
#ifdef DEBUG_OUTPUT
|
||||
extern FILE *gfp1, *gfp2, *gfp3;
|
||||
extern FILE *gfq[4], *gft[4], *gfi[4];
|
||||
#endif
|
||||
|
||||
#define NO_VAL -1
|
||||
|
|
@ -172,6 +173,25 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
|||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||
|
||||
void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||
{
|
||||
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||
FILE *query_f = gfq[fnum],
|
||||
*target_f = gft[fnum],
|
||||
*info_f = gfi[fnum];
|
||||
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||
int i;
|
||||
// 处理query
|
||||
for (i = 0; i < qlen; ++i)
|
||||
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||
fprintf(query_f, "\n");
|
||||
// 处理target
|
||||
for (i = 0; i < tlen; ++i)
|
||||
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||
fprintf(target_f, "\n");
|
||||
// 处理其他信息
|
||||
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||
}
|
||||
|
||||
int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||
const uint8_t *query, // read碱基序列
|
||||
|
|
@ -201,6 +221,15 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
|||
|
||||
#ifdef DEBUG_OUTPUT
|
||||
//fprintf(gfp1, "%d\n", qlen);
|
||||
if (qlen <= 30) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 0);
|
||||
} else if (qlen < 60) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 1);
|
||||
} else if (qlen < 90) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 2);
|
||||
} else {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
||||
|
|
|
|||
17
run.sh
17
run.sh
|
|
@ -3,12 +3,14 @@ thread=1
|
|||
#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq
|
||||
#n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq
|
||||
#n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq
|
||||
#n_r1=~/data/fastq/dataset/na12878_wgs_150/n1.fq
|
||||
#n_r2=~/data/fastq/dataset/na12878_wgs_150/n2.fq
|
||||
#n_r1=~/data/fastq/dataset/na12878_wgs_150/s_1.fq
|
||||
#n_r2=~/data/fastq/dataset/na12878_wgs_150/s_2.fq
|
||||
#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq
|
||||
#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq
|
||||
n_r1=~/data/fastq/dataset/zy_wes/sn1.fq
|
||||
n_r2=~/data/fastq/dataset/zy_wes/sn2.fq
|
||||
n_r1=~/data/fastq/dataset/zy_wes/s_1.fq
|
||||
n_r2=~/data/fastq/dataset/zy_wes/s_2.fq
|
||||
#n_r1=~/data/fastq/dataset/zy_wes/bloodgDNA_r1.fastq
|
||||
#n_r2=~/data/fastq/dataset/zy_wes/bloodgDNA_r2.fastq
|
||||
|
||||
|
||||
#n_r1=~/data/fastq/zy/wgs/n1.fq
|
||||
|
|
@ -54,12 +56,13 @@ n_r2=~/data/fastq/dataset/zy_wes/sn2.fq
|
|||
#n_r2=~/fastq/diff_r2.fq
|
||||
#n_r1=~/fastq/d_r1.fq
|
||||
#n_r2=~/fastq/d_r2.fq
|
||||
reference=~/data/reference/human_g1k_v37_decoy.fasta
|
||||
reference=~/reference/human_g1k_v37_decoy.fasta
|
||||
#reference=~/data/reference/human_g1k_v37_decoy.fasta
|
||||
#out=./all.sam
|
||||
#out=./sn.sam
|
||||
#out=./ssn-x1.sam
|
||||
out=./out.sam
|
||||
#out=/dev/null
|
||||
#out=./out.sam
|
||||
out=/dev/null
|
||||
#out=./na12878.sam
|
||||
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||
|
|
|
|||
5
utils.h
5
utils.h
|
|
@ -31,7 +31,7 @@
|
|||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#define USE_RDTSC 0
|
||||
#define USE_RDTSC 1
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
extern uint64_t time_process_data, time_read, time_write, time_compute,
|
||||
|
|
@ -48,7 +48,8 @@ extern int64_t gdat[100];
|
|||
#endif
|
||||
|
||||
#ifdef DEBUG_OUTPUT
|
||||
extern FILE *gfp1, *gfp2, *gfp3;
|
||||
extern FILE *gfp1, *gfp2, *gfp3, *gfp4;
|
||||
extern FILE *gfq[4], *gft[4], *gfi[4];
|
||||
#endif
|
||||
|
||||
#undef MAX
|
||||
|
|
|
|||
Loading…
Reference in New Issue