添加了fmt_idx文件,开始改进seed过程

This commit is contained in:
zzh 2023-12-25 11:11:19 +08:00
parent d636907248
commit 293f3bb80e
6 changed files with 105 additions and 2 deletions

View File

@ -1,7 +1,7 @@
CC= gcc
#CC= clang --analyze
# CFLAGS= -g -Wall -Wno-unused-function -O2
CFLAGS= -g -Wall -Wno-unused-function #-O2
CFLAGS= -g -Wall -Wno-unused-function -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
@ -10,7 +10,8 @@ LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_p
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
bwape.o kopen.o pemerge.o maxk.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
bwtsw2_chain.o fastmap.o bwtsw2_pair.o \
fmt_idx.o
PROG= bwa
INCLUDES=
LIBS= -lm -lz -lpthread

2
bwa.h
View File

@ -30,6 +30,7 @@
#include <stdint.h>
#include "bntseq.h"
#include "bwt.h"
#include "fmt_idx.h"
#define BWA_IDX_BWT 0x1
#define BWA_IDX_BNS 0x2
@ -47,6 +48,7 @@
typedef struct {
bwt_t *bwt; // FM-index
bwtd_t *bwtd;// FMT-index
bntseq_t *bns; // information on the reference sequences
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base

View File

@ -38,6 +38,7 @@
#include "utils.h"
#include "bntseq.h"
#include "kseq.h"
#include "fmt_idx.h"
KSEQ_DECLARE(gzFile)
extern unsigned char nst_nt4_table[256];
@ -390,6 +391,9 @@ int main_mem(int argc, char *argv[])
opt->flag |= MEM_F_PE;
}
}
BuildBwtdFromBwt(aux.idx->bwt, &aux.idx->bwtd);
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);

27
fmt_idx.c 100644
View File

@ -0,0 +1,27 @@
/*
Description: fmt-idxseedfm-index twice search in one time
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2023/12/24
*/
#include <stdio.h>
#include <stdlib.h>
#include "fmt_idx.h"
// 创建fmt-index索引数据
void BuildBwtdFromBwt(bwt_t *bwt, bwtd_t **bwtd_p) {
*bwtd_p = (bwtd_t *)malloc(sizeof(bwtd_t));
bwtd_t *bwtd = *bwtd_p;
const int baseLen = 12;
const int kmerSize = 1 << (baseLen << 1);
bwtd->kmer_range = (kmer_range_t *)malloc(kmerSize * sizeof(kmer_range_t));
printf("kmer size: %ld M\n", kmerSize * sizeof(kmer_range_t) / 1024 / 1024);
exit(0);
}

44
fmt_idx.h 100644
View File

@ -0,0 +1,44 @@
/*
Description: fmt-idxseedfm-index twice search in one time
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2023/12/24
*/
#ifndef BWA_FMT_IDX_H
#define BWA_FMT_IDX_H
#include <stdint.h>
#include <stddef.h>
#include "bwt.h"
typedef uint64_t bwtint_t;
/*
ignore the first 12 bases, and give the entrence directly
*/
typedef struct {
uint8_t range[9]; // 前36位表示起始行后36位表示结束行fm-index索引行
} kmer_range_t;
typedef struct
{
bwtint_t primary; // S^{-1}(0), or the primary index of BWT
bwtint_t L2[5]; // C(), cumulative count
bwtint_t seq_len; // sequence length
bwtint_t bwt_size; // size of bwt, about seq_len/4
uint32_t *bwt; // BWT
// kmer entry
kmer_range_t *kmer_range;
// occurance array, separated to two parts
uint32_t cnt_table[256];
// suffix array
int sa_intv;
bwtint_t n_sa;
bwtint_t *sa;
} bwtd_t;
// 创建fmt-index索引数据
void BuildBwtdFromBwt(bwt_t *bwt, bwtd_t **bwtd_p);
#endif

25
run.sh 100755
View File

@ -0,0 +1,25 @@
time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
/home/zzh/data/fastq/nm1.fq \
/home/zzh/data/fastq/nm2.fq #-o /dev/null
# time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
# -o /dev/null
# time ./bwa mem -t 64 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
# -o /dev/null
#/public/home/zzh/data/fastq/n1.fq \
#/public/home/zzh/data/fastq/n2.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \
#-o reads_mapping.sam