r795: intel's ksw_extend() replacement
This commit is contained in:
parent
b5415ba23d
commit
d9dfab2ba2
15
Makefile
15
Makefile
|
|
@ -1,6 +1,7 @@
|
|||
CC= gcc
|
||||
#CC= clang --analyze
|
||||
CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||
CXXFLAGS= -g -Wall -Wno-unused-variable -O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
|
|
@ -20,7 +21,7 @@ SUBDIRS= .
|
|||
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
||||
$(CXX) -c $(CXXFLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
all:$(PROG)
|
||||
|
||||
|
|
@ -33,11 +34,14 @@ bwamem-lite:libbwa.a example.o
|
|||
libbwa.a:$(LOBJS)
|
||||
$(AR) -csru $@ $(LOBJS)
|
||||
|
||||
ed_intrav.o:ed_intrav.cpp
|
||||
$(CXX) -c $(CXXFLAGS) $(INCLUDES) -DSW_FILTER_AND_EXTEND -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f gmon.out *.o a.out $(PROG) *~ *.a
|
||||
|
||||
depend:
|
||||
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c )
|
||||
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c *.cpp )
|
||||
|
||||
# DO NOT DELETE THIS LINE -- make depend depends on it.
|
||||
|
||||
|
|
@ -46,7 +50,7 @@ bamlite.o: bamlite.h malloc_wrap.h
|
|||
bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
|
||||
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kseq.h
|
||||
bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
|
||||
bwamem.o: ksort.h utils.h kbtree.h
|
||||
bwamem.o: ksort.h utils.h intel_ext.h kbtree.h
|
||||
bwamem_extra.o: bwa.h bntseq.h bwt.h bwamem.h kstring.h malloc_wrap.h
|
||||
bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
|
||||
bwamem_pair.o: utils.h ksw.h
|
||||
|
|
@ -71,6 +75,7 @@ bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h
|
|||
bwtsw2_pair.o: malloc_wrap.h ksw.h
|
||||
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h
|
||||
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h
|
||||
fastmap.o: intel_ext.h
|
||||
is.o: malloc_wrap.h
|
||||
kopen.o: malloc_wrap.h
|
||||
kstring.o: kstring.h malloc_wrap.h
|
||||
|
|
@ -78,5 +83,7 @@ ksw.o: ksw.h malloc_wrap.h
|
|||
main.o: kstring.h malloc_wrap.h utils.h
|
||||
malloc_wrap.o: malloc_wrap.h
|
||||
pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
|
||||
test-extend.o: ksw.h
|
||||
utils.o: utils.h ksort.h malloc_wrap.h kseq.h
|
||||
ed_intrav.o: ed_intrav64.h ed_intrav64x2.h ed_intrav.h ed_intravED.h
|
||||
ed_intrav.o: ed_fine.h
|
||||
intel_ext.o: intel_ext.h
|
||||
|
|
|
|||
33
bwamem.c
33
bwamem.c
|
|
@ -666,20 +666,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
tmp = s->rbeg - rmax[0];
|
||||
rs = malloc(tmp);
|
||||
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
||||
if (opt->flag & MEM_F_FASTFLT) {
|
||||
int alignedQLen, alignedRlen, score;
|
||||
float confidence;
|
||||
intel_filter(rs, tmp, qs, s->qbeg, s->len, 0, &alignedQLen, &alignedRlen, &score, &confidence);
|
||||
if (confidence == 1.0) {
|
||||
if (alignedQLen == tmp) {
|
||||
a->score = a->truesc = score * opt->a;
|
||||
a->qb = 0; a->rb = s->rbeg - alignedRlen;
|
||||
} else if (alignedQLen == 0) {
|
||||
a->score = a->truesc = s->len * opt->a;
|
||||
a->qb = s->qbeg; a->rb = s->rbeg;
|
||||
}
|
||||
goto end_left_extend;
|
||||
}
|
||||
if (opt->flag & MEM_F_FASTEXT) {
|
||||
a->score = intel_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
goto end_left_extend;
|
||||
}
|
||||
for (i = 0; i < MAX_BAND_TRY; ++i) {
|
||||
int prev = a->score;
|
||||
|
|
@ -711,19 +700,9 @@ end_left_extend:
|
|||
qe = s->qbeg + s->len;
|
||||
re = s->rbeg + s->len - rmax[0];
|
||||
assert(re >= 0);
|
||||
if (opt->flag & MEM_F_FASTFLT) {
|
||||
int alignedQLen, alignedRlen, score;
|
||||
float confidence;
|
||||
intel_filter(rseq + re, rmax[1] - rmax[0] - re, query + qe, l_query - qe, a->score / opt->a, 0, &alignedQLen, &alignedRlen, &score, &confidence);
|
||||
if (confidence == 1.0) {
|
||||
if (alignedQLen == tmp) {
|
||||
a->score = a->truesc = score * opt->a;
|
||||
a->qe = l_query; a->re = rmax[0] + re + alignedRlen;
|
||||
} else if (alignedQLen == 0) {
|
||||
a->qe = qe; a->rb = rmax[0] + re;
|
||||
}
|
||||
goto end_right_extend;
|
||||
}
|
||||
if (opt->flag & MEM_F_FASTEXT) {
|
||||
a->score = intel_extend(l_query - qe, (uint8_t*)query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
goto end_right_extend;
|
||||
}
|
||||
for (i = 0; i < MAX_BAND_TRY; ++i) {
|
||||
int prev = a->score;
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -20,6 +20,7 @@ typedef struct __smem_i smem_i;
|
|||
#define MEM_F_ALN_REG 0x80
|
||||
#define MEM_F_SOFTCLIP 0x200
|
||||
#define MEM_F_FASTFLT 0x400
|
||||
#define MEM_F_FASTEXT 0x800
|
||||
|
||||
typedef struct {
|
||||
int a, b; // match score and mismatch penalty
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|||
|
||||
#include "ed_intrav.h"
|
||||
#include "ed_fine.h"
|
||||
#include "filter.h"
|
||||
|
||||
|
||||
//using namespace std ;
|
||||
|
|
@ -38,6 +37,9 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|||
#define SW_FEEDBACK
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) ;
|
||||
}
|
||||
|
||||
int run_ksw_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
|
||||
int bandW, int initScore, int endBonus, int zdrop, int costMatrixRowCnt,
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[])
|
|||
intel_init();
|
||||
opt = mem_opt_init();
|
||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "XepaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||
else if (c == 'x') mode = optarg;
|
||||
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
||||
|
|
@ -72,6 +72,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
|
||||
else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
|
||||
else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
|
||||
else if (c == 'X') opt->flag |= MEM_F_FASTEXT;
|
||||
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
|
||||
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
|
||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||
|
|
|
|||
56
filter.h
56
filter.h
|
|
@ -1,56 +0,0 @@
|
|||
|
||||
// Needs to be called only once per program to set up static arrays
|
||||
void init_ed_dist() ;
|
||||
|
||||
|
||||
|
||||
|
||||
// The filtering function:
|
||||
// Inputs: Reference and query sequences,
|
||||
// Initial score (i.e. h0),
|
||||
// endBonus (i.e. the extra score if the whole query is aligned)
|
||||
// Outputs:
|
||||
// Alignment length in query
|
||||
// Alignment length in reference
|
||||
// Alignment score
|
||||
// Confidence: For now, it's either 0.0 or 1.0, corresponding to no/full confidence in outputs
|
||||
// Usage:
|
||||
// If confidence == 0.0: Partial alignment - need to rerun ksw_extend(...)
|
||||
// If confidence == 1.0
|
||||
// if alignedQLen == queryLen: Full alignment
|
||||
// if alignedQLen == 0: No alignment
|
||||
// Notes:
|
||||
// For now, the costMatrix and gap penalties are hardcoded.
|
||||
//
|
||||
void extend_with_edit_dist(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
|
||||
int initScore, int endBonus,
|
||||
int& alignedQLen, int& alignedRLen, int& score, float& confidence) ;
|
||||
|
||||
|
||||
// Filter-and-extend function:
|
||||
// Inputs: Reference and query sequences,
|
||||
// Initial score (i.e. h0),
|
||||
// endBonus (i.e. the extra score if the whole query is aligned)
|
||||
// zdrop value passed to ksw_extend
|
||||
// Outputs:
|
||||
// Alignment length in query
|
||||
// Alignment length in reference
|
||||
// Alignment score
|
||||
// Behavior:
|
||||
// The filtering function will be called internally first.
|
||||
// If there is an obvious result, it will be returned.
|
||||
// If not, ksw_extend() will be called with feedback from filtering function,
|
||||
// and its result will be returned.
|
||||
// Notes:
|
||||
// For now, the costMatrix and gap penalties are hardcoded.
|
||||
// It is assumed that ksw_extend(...) function is linked. In this version,
|
||||
// it is defined in bwa_extend.cpp file.
|
||||
void filter_and_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
|
||||
int initScore, int endBonus, int zdrop,
|
||||
int& alignedQLen, int& alignedRLen, int& score) ;
|
||||
|
||||
|
||||
// Declaration needed for filter_and_extend(), defined in bwa_extend.cpp
|
||||
extern "C" {
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) ;
|
||||
}
|
||||
|
|
@ -1,5 +1,9 @@
|
|||
#include "intel_ext.h"
|
||||
|
||||
extern "C" {
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
|
||||
}
|
||||
|
||||
// Needs to be called only once per program to set up static arrays
|
||||
void init_ed_dist() ;
|
||||
|
||||
|
|
@ -56,5 +60,19 @@ void intel_init()
|
|||
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
|
||||
int *alignedQLen, int *alignedRLen, int *score, float *confidence)
|
||||
{
|
||||
if (queryLen < INTEL_MIN_LEN || queryLen > INTEL_MAX_LEN) {
|
||||
*confidence = 0.0;
|
||||
return;
|
||||
}
|
||||
extend_with_edit_dist(refSeq, refLen, querySeq, queryLen, initScore, endBonus, *alignedQLen, *alignedRLen, *score, *confidence);
|
||||
}
|
||||
|
||||
int intel_extend(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off)
|
||||
{
|
||||
if (qlen >= INTEL_MIN_LEN && qlen <= INTEL_MAX_LEN && tlen >= INTEL_MIN_LEN && tlen <= INTEL_MAX_LEN) {
|
||||
int score, a = mat[0];
|
||||
filter_and_extend(target, tlen, query, qlen, h0/a, !!(end_bonus > 0), zdrop/a, *qle, *tle, score);
|
||||
*max_off = w; *gtle = *tle; *gscore = score;
|
||||
return score * a;
|
||||
} else return ksw_extend(qlen, query, tlen, target, m, mat, gapo, gape, w, end_bonus, zdrop, h0, qle, tle, gtle, gscore, max_off);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#define INTEL_MIN_LEN 16
|
||||
#define INTEL_MAX_LEN 126
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -10,6 +13,8 @@ extern "C" {
|
|||
void intel_init();
|
||||
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
|
||||
int *alignedQLen, int *alignedRLen, int *score, float *confidence);
|
||||
int intel_extend(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape,
|
||||
int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue