添加了fmt_idx文件,开始改进seed过程
This commit is contained in:
parent
d636907248
commit
293f3bb80e
5
Makefile
5
Makefile
|
|
@ -1,7 +1,7 @@
|
|||
CC= gcc
|
||||
#CC= clang --analyze
|
||||
# CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||
CFLAGS= -g -Wall -Wno-unused-function #-O2
|
||||
CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
|
|
@ -10,7 +10,8 @@ LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_p
|
|||
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
bwape.o kopen.o pemerge.o maxk.o \
|
||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o \
|
||||
fmt_idx.o
|
||||
PROG= bwa
|
||||
INCLUDES=
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
|
|||
2
bwa.h
2
bwa.h
|
|
@ -30,6 +30,7 @@
|
|||
#include <stdint.h>
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "fmt_idx.h"
|
||||
|
||||
#define BWA_IDX_BWT 0x1
|
||||
#define BWA_IDX_BNS 0x2
|
||||
|
|
@ -47,6 +48,7 @@
|
|||
|
||||
typedef struct {
|
||||
bwt_t *bwt; // FM-index
|
||||
bwtd_t *bwtd;// FMT-index
|
||||
bntseq_t *bns; // information on the reference sequences
|
||||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "utils.h"
|
||||
#include "bntseq.h"
|
||||
#include "kseq.h"
|
||||
#include "fmt_idx.h"
|
||||
KSEQ_DECLARE(gzFile)
|
||||
|
||||
extern unsigned char nst_nt4_table[256];
|
||||
|
|
@ -390,6 +391,9 @@ int main_mem(int argc, char *argv[])
|
|||
opt->flag |= MEM_F_PE;
|
||||
}
|
||||
}
|
||||
|
||||
BuildBwtdFromBwt(aux.idx->bwt, &aux.idx->bwtd);
|
||||
|
||||
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
|
||||
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
|
||||
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
Description: 通过fmt-idx数据结构对seed过程进行加速(fm-index twice search in one time)
|
||||
|
||||
Copyright : All right reserved by ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2023/12/24
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "fmt_idx.h"
|
||||
|
||||
// 创建fmt-index索引数据
|
||||
void BuildBwtdFromBwt(bwt_t *bwt, bwtd_t **bwtd_p) {
|
||||
*bwtd_p = (bwtd_t *)malloc(sizeof(bwtd_t));
|
||||
bwtd_t *bwtd = *bwtd_p;
|
||||
|
||||
const int baseLen = 12;
|
||||
const int kmerSize = 1 << (baseLen << 1);
|
||||
|
||||
bwtd->kmer_range = (kmer_range_t *)malloc(kmerSize * sizeof(kmer_range_t));
|
||||
|
||||
printf("kmer size: %ld M\n", kmerSize * sizeof(kmer_range_t) / 1024 / 1024);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
Description: 通过fmt-idx数据结构对seed过程进行加速(fm-index twice search in one time)
|
||||
|
||||
Copyright : All right reserved by ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2023/12/24
|
||||
*/
|
||||
#ifndef BWA_FMT_IDX_H
|
||||
#define BWA_FMT_IDX_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "bwt.h"
|
||||
|
||||
typedef uint64_t bwtint_t;
|
||||
|
||||
/*
|
||||
ignore the first 12 bases, and give the entrence directly
|
||||
*/
|
||||
typedef struct {
|
||||
uint8_t range[9]; // 前36位表示起始行,后36位表示结束行(fm-index索引行)
|
||||
} kmer_range_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bwtint_t primary; // S^{-1}(0), or the primary index of BWT
|
||||
bwtint_t L2[5]; // C(), cumulative count
|
||||
bwtint_t seq_len; // sequence length
|
||||
bwtint_t bwt_size; // size of bwt, about seq_len/4
|
||||
uint32_t *bwt; // BWT
|
||||
// kmer entry
|
||||
kmer_range_t *kmer_range;
|
||||
// occurance array, separated to two parts
|
||||
uint32_t cnt_table[256];
|
||||
// suffix array
|
||||
int sa_intv;
|
||||
bwtint_t n_sa;
|
||||
bwtint_t *sa;
|
||||
} bwtd_t;
|
||||
|
||||
// 创建fmt-index索引数据
|
||||
void BuildBwtdFromBwt(bwt_t *bwt, bwtd_t **bwtd_p);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||
/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||
/home/zzh/data/fastq/nm1.fq \
|
||||
/home/zzh/data/fastq/nm2.fq #-o /dev/null
|
||||
|
||||
# time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
|
||||
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
|
||||
# -o /dev/null
|
||||
|
||||
# time ./bwa mem -t 64 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
|
||||
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
|
||||
# -o /dev/null
|
||||
|
||||
#/public/home/zzh/data/fastq/n1.fq \
|
||||
#/public/home/zzh/data/fastq/n2.fq \
|
||||
#/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \
|
||||
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \
|
||||
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \
|
||||
#-o reads_mapping.sam
|
||||
|
||||
|
||||
Loading…
Reference in New Issue