r795: intel's ksw_extend() replacement

This commit is contained in:
Heng Li 2014-08-07 13:43:07 -04:00
parent b5415ba23d
commit d9dfab2ba2
9 changed files with 47 additions and 90 deletions

View File

@ -1,6 +1,7 @@
CC= gcc
#CC= clang --analyze
CFLAGS= -g -Wall -Wno-unused-function -O2
CXXFLAGS= -g -Wall -Wno-unused-variable -O3
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
@ -20,7 +21,7 @@ SUBDIRS= .
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
.cpp.o:
$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
$(CXX) -c $(CXXFLAGS) $(INCLUDES) $< -o $@
all:$(PROG)
@ -33,11 +34,14 @@ bwamem-lite:libbwa.a example.o
libbwa.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS)
ed_intrav.o:ed_intrav.cpp
$(CXX) -c $(CXXFLAGS) $(INCLUDES) -DSW_FILTER_AND_EXTEND -o $@ $<
clean:
rm -f gmon.out *.o a.out $(PROG) *~ *.a
depend:
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c )
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c *.cpp )
# DO NOT DELETE THIS LINE -- make depend depends on it.
@ -46,7 +50,7 @@ bamlite.o: bamlite.h malloc_wrap.h
bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kseq.h
bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
bwamem.o: ksort.h utils.h kbtree.h
bwamem.o: ksort.h utils.h intel_ext.h kbtree.h
bwamem_extra.o: bwa.h bntseq.h bwt.h bwamem.h kstring.h malloc_wrap.h
bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
bwamem_pair.o: utils.h ksw.h
@ -71,6 +75,7 @@ bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h
bwtsw2_pair.o: malloc_wrap.h ksw.h
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h
fastmap.o: intel_ext.h
is.o: malloc_wrap.h
kopen.o: malloc_wrap.h
kstring.o: kstring.h malloc_wrap.h
@ -78,5 +83,7 @@ ksw.o: ksw.h malloc_wrap.h
main.o: kstring.h malloc_wrap.h utils.h
malloc_wrap.o: malloc_wrap.h
pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
test-extend.o: ksw.h
utils.o: utils.h ksort.h malloc_wrap.h kseq.h
ed_intrav.o: ed_intrav64.h ed_intrav64x2.h ed_intrav.h ed_intravED.h
ed_intrav.o: ed_fine.h
intel_ext.o: intel_ext.h

View File

@ -666,20 +666,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
tmp = s->rbeg - rmax[0];
rs = malloc(tmp);
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
if (opt->flag & MEM_F_FASTFLT) {
int alignedQLen, alignedRlen, score;
float confidence;
intel_filter(rs, tmp, qs, s->qbeg, s->len, 0, &alignedQLen, &alignedRlen, &score, &confidence);
if (confidence == 1.0) {
if (alignedQLen == tmp) {
a->score = a->truesc = score * opt->a;
a->qb = 0; a->rb = s->rbeg - alignedRlen;
} else if (alignedQLen == 0) {
a->score = a->truesc = s->len * opt->a;
a->qb = s->qbeg; a->rb = s->rbeg;
}
goto end_left_extend;
}
if (opt->flag & MEM_F_FASTEXT) {
a->score = intel_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
goto end_left_extend;
}
for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score;
@ -711,19 +700,9 @@ end_left_extend:
qe = s->qbeg + s->len;
re = s->rbeg + s->len - rmax[0];
assert(re >= 0);
if (opt->flag & MEM_F_FASTFLT) {
int alignedQLen, alignedRlen, score;
float confidence;
intel_filter(rseq + re, rmax[1] - rmax[0] - re, query + qe, l_query - qe, a->score / opt->a, 0, &alignedQLen, &alignedRlen, &score, &confidence);
if (confidence == 1.0) {
if (alignedQLen == tmp) {
a->score = a->truesc = score * opt->a;
a->qe = l_query; a->re = rmax[0] + re + alignedRlen;
} else if (alignedQLen == 0) {
a->qe = qe; a->rb = rmax[0] + re;
}
goto end_right_extend;
}
if (opt->flag & MEM_F_FASTEXT) {
a->score = intel_extend(l_query - qe, (uint8_t*)query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
goto end_right_extend;
}
for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score;

View File

@ -20,6 +20,7 @@ typedef struct __smem_i smem_i;
#define MEM_F_ALN_REG 0x80
#define MEM_F_SOFTCLIP 0x200
#define MEM_F_FASTFLT 0x400
#define MEM_F_FASTEXT 0x800
typedef struct {
int a, b; // match score and mismatch penalty

View File

@ -17,7 +17,6 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#include "ed_intrav.h"
#include "ed_fine.h"
#include "filter.h"
//using namespace std ;
@ -38,6 +37,9 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#define SW_FEEDBACK
#endif
extern "C" {
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) ;
}
int run_ksw_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int bandW, int initScore, int endBonus, int zdrop, int costMatrixRowCnt,

View File

@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[])
intel_init();
opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
while ((c = getopt(argc, argv, "XepaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == 'x') mode = optarg;
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
@ -72,6 +72,7 @@ int main_mem(int argc, char *argv[])
else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
else if (c == 'X') opt->flag |= MEM_F_FASTEXT;
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
else if (c == 'v') bwa_verbose = atoi(optarg);

View File

@ -1,56 +0,0 @@
// Needs to be called only once per program to set up static arrays
void init_ed_dist() ;
// The filtering function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Confidence: For now, it's either 0.0 or 1.0, corresponding to no/full confidence in outputs
// Usage:
// If confidence == 0.0: Partial alignment - need to rerun ksw_extend(...)
// If confidence == 1.0
// if alignedQLen == queryLen: Full alignment
// if alignedQLen == 0: No alignment
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
//
void extend_with_edit_dist(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus,
int& alignedQLen, int& alignedRLen, int& score, float& confidence) ;
// Filter-and-extend function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// zdrop value passed to ksw_extend
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Behavior:
// The filtering function will be called internally first.
// If there is an obvious result, it will be returned.
// If not, ksw_extend() will be called with feedback from filtering function,
// and its result will be returned.
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
// It is assumed that ksw_extend(...) function is linked. In this version,
// it is defined in bwa_extend.cpp file.
void filter_and_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus, int zdrop,
int& alignedQLen, int& alignedRLen, int& score) ;
// Declaration needed for filter_and_extend(), defined in bwa_extend.cpp
extern "C" {
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) ;
}

View File

@ -1,5 +1,9 @@
#include "intel_ext.h"
extern "C" {
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
}
// Needs to be called only once per program to set up static arrays
void init_ed_dist() ;
@ -56,5 +60,19 @@ void intel_init()
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
int *alignedQLen, int *alignedRLen, int *score, float *confidence)
{
if (queryLen < INTEL_MIN_LEN || queryLen > INTEL_MAX_LEN) {
*confidence = 0.0;
return;
}
extend_with_edit_dist(refSeq, refLen, querySeq, queryLen, initScore, endBonus, *alignedQLen, *alignedRLen, *score, *confidence);
}
int intel_extend(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off)
{
if (qlen >= INTEL_MIN_LEN && qlen <= INTEL_MAX_LEN && tlen >= INTEL_MIN_LEN && tlen <= INTEL_MAX_LEN) {
int score, a = mat[0];
filter_and_extend(target, tlen, query, qlen, h0/a, !!(end_bonus > 0), zdrop/a, *qle, *tle, score);
*max_off = w; *gtle = *tle; *gscore = score;
return score * a;
} else return ksw_extend(qlen, query, tlen, target, m, mat, gapo, gape, w, end_bonus, zdrop, h0, qle, tle, gtle, gscore, max_off);
}

View File

@ -3,6 +3,9 @@
#include <stdint.h>
#define INTEL_MIN_LEN 16
#define INTEL_MAX_LEN 126
#ifdef __cplusplus
extern "C" {
#endif
@ -10,6 +13,8 @@ extern "C" {
void intel_init();
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
int *alignedQLen, int *alignedRLen, int *score, float *confidence);
int intel_extend(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape,
int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
#ifdef __cplusplus
}

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r789"
#define PACKAGE_VERSION "0.7.10-r795-dirty"
#endif
int bwa_fa2pac(int argc, char *argv[]);