first integration; compiled, but NOT TESTED!

This commit is contained in:
Heng Li 2014-08-07 11:59:37 -04:00
parent 705aa53894
commit b5415ba23d
13 changed files with 3499 additions and 4 deletions

View File

@ -4,7 +4,7 @@ CFLAGS= -g -Wall -Wno-unused-function -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o intel_ext.o ed_intrav.o
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o pemerge.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
@ -14,18 +14,21 @@ INCLUDES=
LIBS= -lm -lz -lpthread
SUBDIRS= .
.SUFFIXES:.c .o .cc
.SUFFIXES:.c .o .cc .cpp
.c.o:
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
.cpp.o:
$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
all:$(PROG)
bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
$(CXX) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
bwamem-lite:libbwa.a example.o
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
$(CXX) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
libbwa.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS)

View File

@ -15,6 +15,8 @@
#include "ksort.h"
#include "utils.h"
#include "intel_ext.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
@ -664,6 +666,21 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
tmp = s->rbeg - rmax[0];
rs = malloc(tmp);
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
if (opt->flag & MEM_F_FASTFLT) {
int alignedQLen, alignedRlen, score;
float confidence;
intel_filter(rs, tmp, qs, s->qbeg, s->len, 0, &alignedQLen, &alignedRlen, &score, &confidence);
if (confidence == 1.0) {
if (alignedQLen == tmp) {
a->score = a->truesc = score * opt->a;
a->qb = 0; a->rb = s->rbeg - alignedRlen;
} else if (alignedQLen == 0) {
a->score = a->truesc = s->len * opt->a;
a->qb = s->qbeg; a->rb = s->rbeg;
}
goto end_left_extend;
}
}
for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score;
aw[0] = opt->w << i;
@ -684,6 +701,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
a->qb = 0, a->rb = s->rbeg - gtle;
a->truesc = gscore;
}
end_left_extend:
free(qs); free(rs);
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
@ -692,6 +711,20 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
qe = s->qbeg + s->len;
re = s->rbeg + s->len - rmax[0];
assert(re >= 0);
if (opt->flag & MEM_F_FASTFLT) {
int alignedQLen, alignedRlen, score;
float confidence;
intel_filter(rseq + re, rmax[1] - rmax[0] - re, query + qe, l_query - qe, a->score / opt->a, 0, &alignedQLen, &alignedRlen, &score, &confidence);
if (confidence == 1.0) {
if (alignedQLen == tmp) {
a->score = a->truesc = score * opt->a;
a->qe = l_query; a->re = rmax[0] + re + alignedRlen;
} else if (alignedQLen == 0) {
a->qe = qe; a->rb = rmax[0] + re;
}
goto end_right_extend;
}
}
for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score;
aw[1] = opt->w << i;
@ -715,6 +748,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
} else a->qe = l_query, a->re = s->rbeg + s->len;
if (bwa_verbose >= 4) printf("*** Added alignment region: [%d,%d) <=> [%ld,%ld); score=%d; {left,right}_bandwidth={%d,%d}\n", a->qb, a->qe, (long)a->rb, (long)a->re, a->score, aw[0], aw[1]);
end_right_extend:
// compute seedcov
for (i = 0, a->seedcov = 0; i < c->n; ++i) {
const mem_seed_t *t = &c->seeds[i];

View File

@ -19,6 +19,7 @@ typedef struct __smem_i smem_i;
#define MEM_F_SELF_OVLP 0x40
#define MEM_F_ALN_REG 0x80
#define MEM_F_SOFTCLIP 0x200
#define MEM_F_FASTFLT 0x400
typedef struct {
int a, b; // match score and mismatch penalty

115
ed_fine.h 100644
View File

@ -0,0 +1,115 @@
/*
The MIT License (MIT)
Copyright (c) 2014 Intel Corp.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdint.h>
#include <iostream>
class BitCount8 {
static uint8_t lut_[256] ;
public:
static void init_lut() ;
static int getCnt(uint8_t bitVec) {
return lut_[bitVec] ;
}
} ;
class BitCount16 {
static uint8_t lut_[256] ;
public:
static void init_lut() ;
static int getCnt(uint16_t bitVec) {
return BitCount8::getCnt((uint8_t) (bitVec >> 8)) +
BitCount8::getCnt((uint8_t) bitVec) ;
}
} ;
// Fine-grain alignment for up to the first 16 bases.
// Considers only alignments where qlen=rlen.
class FineAlignment16 {
int queryLen_, endBonus_ ;
int bestAlignedLenLow_, bestAlignedLenHigh_ ;
int maxScoreLow_, maxScoreHigh_ ;
int mismatchWtP1Low_, mismatchWtP1High_ ;
int ambigWtP1_ ;
public:
FineAlignment16(int queryLen, int endBonus, int mismatchWtLow, int mismatchWtHigh, int ambigWt)
: queryLen_(queryLen), endBonus_(endBonus),
mismatchWtP1Low_(mismatchWtLow+1), mismatchWtP1High_(mismatchWtHigh+1),
ambigWtP1_(ambigWt+1),
bestAlignedLenLow_(0), bestAlignedLenHigh_(0), maxScoreLow_(0), maxScoreHigh_(0) {}
void update(uint16_t UP0, uint16_t UP1, int partialLen, int ambigCnt) {
uint16_t mask = ((uint32_t)(1) << partialLen) - 1 ; // clear higher bits beyond partialLen
UP0 &= mask ;
UP1 &= mask ;
int editDist = (partialLen <= 8) ?
(BitCount8::getCnt((uint8_t)UP0) - BitCount8::getCnt((uint8_t)UP1)) :
(BitCount16::getCnt(UP0) - BitCount16::getCnt(UP1)) ;
int bonus = (partialLen == queryLen_) ? endBonus_ : 0 ;
int scoreLow = partialLen - editDist * mismatchWtP1High_ - ambigCnt * ambigWtP1_ + bonus ;
int scoreHigh = partialLen - editDist * mismatchWtP1Low_ - ambigCnt * ambigWtP1_ + bonus ;
//cout << std::hex << mask << " " << UP0 << " " << UP1 << std::dec << endl ;
//cout << BitCount16::getCnt(UP0) << " " << BitCount16::getCnt(UP1) << endl ;
//cout << std::hex << (int) ((uint8_t) (UP0 >> 8)) << " " << (int) ((uint8_t) UP0) << endl ;
//cout << "###################len = " << partialLen << ": " << ambigCnt << " " << editDist
// << " " << scoreLow << " " << scoreHigh << endl ;
if (scoreLow > maxScoreLow_) {
maxScoreLow_ = scoreLow ;
bestAlignedLenLow_ = partialLen ;
}
if (scoreHigh > maxScoreHigh_) {
maxScoreHigh_ = scoreHigh ;
bestAlignedLenHigh_ = partialLen ;
}
}
void getBestLow(int& alignedLen, int& score) {
alignedLen = bestAlignedLenLow_ ;
score = maxScoreLow_ ;
}
void getBestHigh(int& alignedLen, int& score) {
alignedLen = bestAlignedLenHigh_ ;
score = maxScoreHigh_ ;
}
friend ostream& operator<< (ostream& os, const FineAlignment16& a) {
os << "{maxScores = (" << a.maxScoreLow_ << ", " << a.maxScoreHigh_ << "); best cols: "
<< a.bestAlignedLenLow_ << ", " << a.bestAlignedLenHigh_ << "}" ;
return os ;
}
} ;

1280
ed_intrav.cpp 100644

File diff suppressed because it is too large Load Diff

131
ed_intrav.h 100644
View File

@ -0,0 +1,131 @@
/*
The MIT License (MIT)
Copyright (c) 2014 Intel Corp.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ED_INTRAV_H
#define ED_INTRAV_H
#include "ed_intravED.h"
const int costMatrixRowCnt = 5 ;
const int8_t costMatrix[] = {
1, -4, -4, -4, -1,
-4, 1, -4, -4, -1,
-4, -4, 1, -4, -1,
-4, -4, -4, 1, -1,
-1, -1, -1, -1, -1
} ;
const int gapo = 6 ;
const int gape = 1 ;
int run_ksw_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int bandW, int initScore, int endBonus, int zdrop, int costMatrixRowCnt,
const int8_t* costMatrix, int gapo, int gape,
int& alignedQLen, int& alignedRLen) ;
struct SWFeedback {
int maxQLen ;
int maxRLen ;
int maxBand ;
SWFeedback():maxQLen(0), maxRLen(0), maxBand(0) {} ;
SWFeedback(int qlen, int rlen, int band): maxQLen(qlen), maxRLen(rlen), maxBand(band) {}
void updateMax(int qlen, int rlen, int band) {
maxQLen = std::max(maxQLen, qlen) ;
maxRLen = std::max(maxRLen, rlen) ;
maxBand = std::max(maxBand, band) ;
}
} ;
inline ostream& operator<<(ostream& os, const SWFeedback& swfb) {
os << "{" << swfb.maxQLen << " " << swfb.maxRLen << " " << swfb.maxBand << "}" ;
return os ;
}
template<class BitVec, class EDVec>
class DistVec {
EDVec dist_, mask_ ;
int queryLen_, msWordIndex_, probeOffset_ ;
public:
DistVec(int queryLen):
queryLen_(queryLen), msWordIndex_(EDVec::getLastWordIndexFor(queryLen)),
probeOffset_(EDVec::getProbeOffsetFor(queryLen)) {
mask_.setWordsAsMask() ;
}
const EDVec& getVec() const {
return dist_ ;
}
void initDist(int initU0, int initU1) {
dist_.setWordsAsDist(queryLen_, initU0 - initU1) ;
}
void addDist (const BitVec& LP0, const BitVec& LP1) {
/*
cout << std::hex << "+++++" << LP0 << " " << LP1 << " " << std::dec << probeOffset_ << endl
<< std::hex << (EDVec(LP0.shiftProbesRight(probeOffset_))) << " "
<< ((EDVec(LP0.shiftProbesRight(probeOffset_))) & mask_) << endl
<< std::hex << (EDVec(LP1.shiftProbesRight(probeOffset_))) << " "
<< ((EDVec(LP1.shiftProbesRight(probeOffset_))) & mask_)
<< std::dec << endl ;
cout << "Dist before: " << dist_ << endl ;
*/
dist_ += (EDVec(LP0.shiftProbesRight(probeOffset_)) & mask_) ;
dist_ -= (EDVec(LP1.shiftProbesRight(probeOffset_)) & mask_) ;
//cout << "Dist after: " << dist_ << endl ;
}
EDVec getDeltaDistVec(const BitVec& LP0, const BitVec& LP1) {
EDVec r = EDVec(LP0.shiftProbesRight(probeOffset_)) & mask_ ;
r -= EDVec(LP1.shiftProbesRight(probeOffset_)) & mask_ ;
return r ;
}
void addDist (const EDVec& deltaDist) {
dist_ += deltaDist ;
}
int getValidDistCnt() const {
return msWordIndex_ + 1 ;
}
int getProbeColumn(int index) const {
return EDVec::getDistAt(index, queryLen_) - 1 ;
}
int getDist(int index) const {
return (int) dist_.getWord(index) ;
}
int getTotalDist() const {
return (int) dist_.getWord(msWordIndex_) ;
}
void setMin (const DistVec& other) {
this->dist_.setMin(other.dist_) ;
}
friend std::ostream& operator<< (std::ostream& os, const DistVec& d) {
os << d.dist_ ;
return os ;
}
} ;
#endif // #ifndef ED_INTRAV_H

308
ed_intrav64.h 100644
View File

@ -0,0 +1,308 @@
/*
The MIT License (MIT)
Copyright (c) 2014 Intel Corp.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _ED_INTRAV64_H
#define _ED_INTRAV64_H
#include <stdint.h>
#include <stdlib.h>
#include <limits>
class BitVec64 {
uint64_t bitV ;
explicit BitVec64(uint64_t val): bitV(val) {}
// cout << "val = " << val << ", bitV = " << bitV << endl ;
public:
BitVec64(): bitV(0x0) {}
inline bool getLSB() const {
return bitV & 0x1 ;
}
inline uint16_t getLow16Bits() const {
return (uint16_t) bitV ;
}
inline void setAllOnes () {
bitV = 0xFFFFFFFFFFFFFFFF ;
}
inline void setAllZeroes () {
bitV = 0x0000000000000000 ;
}
inline void setAllBits (bool bit) {
bitV = bit ? 0xFFFFFFFFFFFFFFFF : 0x0 ;
}
inline void setLSBClearRest (bool bit) {
bitV = bit ;
}
inline void setBit(int bitIndex, bool bit) {
uint64_t mask = ((uint64_t)0x1) << bitIndex ;
bitV = bit ? (bitV | mask) : (bitV & ~mask) ;
}
inline void shiftLeftAndInsert (bool newBit) {
bitV = (bitV << 1) | ((uint8_t) newBit) ;
}
inline void shiftLeft () {
bitV <<= 1 ;
}
/*
inline BitVec64 shiftBitsRight(int offset) const {
return BitVec64(bitV >> offset) ;
}
*/
inline BitVec64 shiftProbesRight(int probeOffset) const {
// The carryout bit is at location probeOffset+1
return BitVec64(bitV >> (probeOffset+1)) ;
}
void setWord (int index, uint64_t w) {
bitV = w ;
}
inline void orBitAtPosition (bool newBit, int index) {
bitV |= (((uint64_t)newBit) << index) ;
}
inline bool getMSB() const {
return bitV & 0x8000000000000000 ;
}
inline BitVec64 operator~ () const {
return BitVec64(~this->bitV) ;
}
inline BitVec64 operator& (const BitVec64& other) const {
return BitVec64(this->bitV & other.bitV) ;
}
inline BitVec64 andnot (const BitVec64& other) const {
return *this & (~other) ;
}
inline BitVec64 operator| (const BitVec64& other) const {
return BitVec64(this->bitV | other.bitV) ;
}
inline BitVec64 operator^ (const BitVec64& other) const {
return BitVec64(this->bitV ^ other.bitV) ;
}
inline BitVec64 operator+ (const BitVec64& other) const {
return BitVec64(this->bitV + other.bitV) ;
}
//inline BitVec64 operator>>(int shiftVal) const {
// return BitVec64(this->bitV >> shiftVal) ;
//}
static bool isQueryLengthOk(int qlen) {
return qlen < 64 ;
}
bool getBit (int index) const {
return (bitV >> index) & 0x1 ;
}
friend std::ostream& operator<< (std::ostream& os, const BitVec64& b) {
os << b.bitV ;
return os ;
}
friend class EDVec64 ;
friend class EDVec128Every16 ;
friend class EDVec64Every8 ;
friend class EDVec32Every4 ;
friend class EDVec16Every1 ;
} ;
class EDVec64 {
int64_t val_ ;
public:
EDVec64() {}
explicit EDVec64(int val): val_(val) {}
explicit EDVec64(const BitVec64& bv): val_(bv.bitV) {}
inline void setAll(int val) {
val_ = val ;
}
static int16_t getMaxWordVal() { return std::numeric_limits<int16_t>::max() ;}
inline int getWord(int index) const {
return (int) val_ ;
}
bool allLessThanOrEqualTo(const EDVec64& other) const {
return val_ <= other.val_ ;
}
inline void setWords (int* vals) {
val_ = vals[0] ;
}
inline void setFirstWord(int val) {
val_ = val ;
}
inline void setWordsAsMask () {
val_ = 0x1 ;
}
static int getLastWordIndexFor(int queryLen) {
return 0 ;
}
static int getProbeOffsetFor (int queryLen) {
return queryLen-1 ;
}
static int getBitOffsetFor(int baseIndex) {
return baseIndex ;
}
void setWordsAsEndBonus(int queryLen, int endBonus) {
val_ = endBonus ;
}
void setWordsAsDist (int queryLen, int distWt) {
val_ = queryLen * distWt ;
}
void setWordsAsBadScore(int queryLen, int thr, int infScore) {
val_ = infScore ;
}
static int getDistAt(int wordIndex, int queryLen) {
return queryLen ;
}
void setMin(const EDVec64& other) {
this->val_ = std::min(this->val_, other.val_) ;
}
void setMin(const EDVec64& other, EDVec64& bestIndices, const EDVec64& otherIndices) {
if (other.val_ < this->val_) {
this->val_ = other.val_ ;
bestIndices.val_ = otherIndices.val_ ;
}
}
void setMax(const EDVec64& other, EDVec64& bestIndices, const EDVec64& otherIndices) {
if (other.val_ > this->val_) {
this->val_ = other.val_ ;
bestIndices.val_ = otherIndices.val_ ;
}
}
uint16_t setMaxAndReturnFlag(const EDVec64& other, EDVec64& bestIndices, const EDVec64& otherIndices) {
if (other.val_ > this->val_) {
this->val_ = other.val_ ;
bestIndices.val_ = otherIndices.val_ ;
return 0x1 ;
}
return 0x0 ;
}
void addThirdIfFirstGTSecond(const EDVec64& first, const EDVec64& second,
const EDVec64& third, const EDVec64& zero) {
if (first.val_ > second.val_)
this->val_ += third.val_ ;
}
/*
EDVec64 abs(const EDVec64& other) const {
return EDVec64(labs((uint64_t)this->val_ - other.val_)) ;
}
EDVec64 abs() const {
return EDVec64(labs(this->val_)) ;
}
*/
EDVec64 subSat(const EDVec64& other) const {
int64_t diff = this->val_ - other.val_ ;
return EDVec64(diff < 0 ? 0 : diff) ;
}
inline EDVec64 shiftBitsRightWithinWords(int shiftVal) {
return EDVec64(this->val_ >> shiftVal) ;
}
inline void shiftWordsLeftByOne() {
val_ = 0 ;
}
inline bool operator == (const EDVec64& other) const {
return this->val_ == other.val_ ;
}
inline EDVec64& operator += (const EDVec64& other) {
this->val_ += other.val_ ;
return *this ;
}
inline EDVec64& operator -= (const EDVec64& other) {
this->val_ -= other.val_ ;
return *this ;
}
inline EDVec64 operator+ (const EDVec64& other) const {
return EDVec64(this->val_ + other.val_) ;
}
inline EDVec64 operator- (const EDVec64& other) const {
return EDVec64(this->val_ - other.val_) ;
}
inline EDVec64 operator* (const EDVec64& other) const {
return EDVec64(this->val_ * other.val_) ;
}
inline EDVec64 operator& (const EDVec64& other) const {
return EDVec64(this->val_ & other.val_) ;
}
static int PERIOD() {
return 64 ;
}
static int WORD_CNT() {
return 1 ;
}
friend std::ostream& operator<< (std::ostream& os, const EDVec64& v) {
os << v.val_ ;
return os ;
}
} ;
#endif // #ifndef _ED_INTRAV64_H

408
ed_intrav64x2.h 100644
View File

@ -0,0 +1,408 @@
/*
The MIT License (MIT)
Copyright (c) 2014 Intel Corp.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _ED_INTRAV64x2_H
#define _ED_INTRAV64x2_H
#include <stdint.h>
#include <stdlib.h>
#include <iostream>
using namespace std ;
class BitVec64x2 {
uint64_t bitV[2] ;
public:
BitVec64x2() {
bitV[0] = 0x0 ;
bitV[1] = 0x0 ;
}
inline bool getLSB() const {
return bitV[0] & 0x1 ;
}
inline uint16_t getLow16Bits() const {
return (uint16_t) bitV[0] ;
}
inline void setAllOnes () {
bitV[0] = 0xFFFFFFFFFFFFFFFF ;
bitV[1] = 0xFFFFFFFFFFFFFFFF ;
}
inline void setAllZeroes () {
bitV[0] = 0x0000000000000000 ;
bitV[1] = 0x0000000000000000 ;
}
inline void setAllBits (bool bit) {
if (bit) setAllOnes() ;
else setAllZeroes() ;
}
inline void setLSBClearRest (bool bit) {
bitV[0] = bit ;
bitV[1] = 0x0 ;
}
inline void shiftLeftAndInsert (bool newBit) {
bitV[0] <<= 1 ;
bitV[1] <<= 1 ;
bitV[1] |= (bitV[0] >> 63) ;
bitV[0] |= ((uint8_t) newBit) ;
}
inline void shiftLeft () {
bitV[0] <<= 1 ;
bitV[1] <<= 1 ;
bitV[1] |= (bitV[0] >> 63) ;
}
/*
inline BitVec64x2 shiftBitsRight(int offset) const {
BitVec64x2 r ;
r.bitV[0] = this->bitV[0] >> offset ;
r.bitV[1] = this->bitV[1] >> offset ;
return r ;
}
*/
inline BitVec64x2 shiftProbesRight(int probeOffset) const {
BitVec64x2 r ;
// Copy LSB of bitV[1] to MSB of bitV[0] in case probeOffset is 15
// L' of base 62 is equal to L of base 63, which is stored at the LSB of bitV[1]
r.bitV[0] = (this->bitV[0] & 0x7FFFFFFFFFFFFFFF) | (this->bitV[1] << 63) ;
// The carryout bit is at location probeOffset+1
r.bitV[0] = r.bitV[0] >> probeOffset ; // probes in the lower word have offset-1
r.bitV[1] = this->bitV[1] >> (probeOffset+1) ;
return r ;
}
void setWord (int index, uint64_t w) {
bitV[index] = w ;
}
inline BitVec64x2 operator~ () const {
BitVec64x2 r ;
r.bitV[0] = ~this->bitV[0] ;
r.bitV[1] = ~this->bitV[1] ;
return r ;
}
inline BitVec64x2 operator& (const BitVec64x2& other) const {
BitVec64x2 r ;
r.bitV[0] = this->bitV[0] & other.bitV[0] ;
r.bitV[1] = this->bitV[1] & other.bitV[1] ;
return r ;
}
inline BitVec64x2 andnot (const BitVec64x2& other) const {
return *this & (~other) ;
}
inline BitVec64x2 operator| (const BitVec64x2& other) const {
BitVec64x2 r ;
r.bitV[0] = this->bitV[0] | other.bitV[0] ;
r.bitV[1] = this->bitV[1] | other.bitV[1] ;
return r ;
}
inline BitVec64x2 operator^ (const BitVec64x2& other) const {
BitVec64x2 r ;
r.bitV[0] = this->bitV[0] ^ other.bitV[0] ;
r.bitV[1] = this->bitV[1] ^ other.bitV[1] ;
return r ;
}
inline BitVec64x2 operator+ (const BitVec64x2& other) const {
BitVec64x2 r ;
r.bitV[0] = (this->bitV[0] & 0x7FFFFFFFFFFFFFFF)
+ (other.bitV[0] & 0x7FFFFFFFFFFFFFFF) ;
r.bitV[1] = this->bitV[1] + other.bitV[1] ;
r.bitV[1] += ((r.bitV[0] >> 63) & 0x1) ;
/*
cout << "Add operation: " << endl ;
cout << std::hex << this->bitV[0] << " + " << other.bitV[0] << endl ;
cout << this->bitV[1] << " + " << other.bitV[1] << endl ;
cout << "Output: " << endl ;
cout << r.bitV[0] << endl ;
cout << r.bitV[1] << std::dec << endl ;
*/
return r ;
}
static bool isQueryLengthOk(int qlen) {
return qlen <= 126 ;
}
bool getBit (int bitIndex) const {
const uint64_t& bitVchosen = (bitIndex < 63) ? bitV[0] : bitV[1] ;
int bitIndexMod = (bitIndex < 63) ? bitIndex : (bitIndex - 63) ;
return (bitVchosen >> bitIndexMod) & 0x1 ;
}
friend std::ostream& operator<< (std::ostream& os, const BitVec64x2& b) {
os << "{" << b.bitV[0] << ", " << b.bitV[1] << "}" ;
return os ;
}
friend class EDVec64x2 ;
friend class EDVec128Every16 ;
} ;
class EDVec64x2 {
int64_t vals_[2] ;
public:
EDVec64x2() {}
explicit EDVec64x2(int val) {
vals_[0] = val ;
vals_[1] = val ;
}
explicit EDVec64x2(const BitVec64x2& bv) {
vals_[0] = bv.bitV[0] ;
vals_[1] = bv.bitV[1] ;
}
inline void set(int val, int index) {
vals_[index] = val ;
}
static int16_t getMaxWordVal() { return numeric_limits<int16_t>::max() ;}
inline int getWord(int index) const {
return (int) vals_[index] ;
}
bool allLessThanOrEqualTo(const EDVec64x2& other) const {
return vals_[0] <= other.vals_[0] && vals_[1] <= other.vals_[1] ;
}
inline void setWords (int* vals) {
vals_[0] = vals[0] ;
vals_[1] = vals[1] ;
}
inline void setFirstWord(int val) {
vals_[0] = val ;
}
inline void setWordsAsMask () {
vals_[0] = 0x1 ;
vals_[1] = 0x1 ;
}
static int getLastWordIndexFor(int queryLen) {
int baseIndex = queryLen - 1 ;
return (baseIndex < 63) ? 0 : 1 ;
}
static int getProbeOffsetFor (int queryLen) {
return (queryLen-1) % 63 ;
}
static int getBitOffsetFor(int baseIndex) {
return (baseIndex < 63) ? baseIndex : (baseIndex - 63) ;
}
void setWordsAsEndBonus(int queryLen, int endBonus) {
vals_[0] = vals_[1] = 0 ;
if (queryLen <= 63)
vals_[0] = endBonus ;
else
vals_[1] = endBonus ;
}
inline void setWordsAsDist(int queryLen, int distWt) {
int bitOffset = getBitOffsetFor(queryLen-1) ;
vals_[0] = (bitOffset + 1) * distWt ;
vals_[1] = (63 + bitOffset + 1) * distWt ;
}
void setWordsAsBadScore(int queryLen, int thr, int infScore) {
vals_[0] = thr ;
vals_[1] = thr ;
vals_[getLastWordIndexFor(queryLen-1)] = infScore ;
}
static int getDistAt(int wordIndex, int queryLen) {
return wordIndex * PERIOD() + getProbeOffsetFor(queryLen) ;
}
void setMin(const EDVec64x2& other) {
this->vals_[0] = min(this->vals_[0], other.vals_[0]) ;
this->vals_[1] = min(this->vals_[1], other.vals_[1]) ;
}
void setMin(const EDVec64x2& other, EDVec64x2& bestIndices, const EDVec64x2& otherIndices) {
for (int i=0; i < 2; ++i) {
if (other.vals_[i] < this->vals_[i]) {
this->vals_[i] = other.vals_[i] ;
bestIndices.vals_[i] = otherIndices.vals_[i] ;
}
}
}
void setMax(const EDVec64x2& other, EDVec64x2& bestIndices, const EDVec64x2& otherIndices) {
for (int i=0; i < 2; ++i) {
if (other.vals_[i] > this->vals_[i]) {
this->vals_[i] = other.vals_[i] ;
bestIndices.vals_[i] = otherIndices.vals_[i] ;
}
}
}
uint16_t setMaxAndReturnFlag(const EDVec64x2& other, EDVec64x2& bestIndices, const EDVec64x2& otherIndices) {
uint16_t flag = 0x0 ;
for (int i=0; i < 2; ++i) {
if (other.vals_[i] > this->vals_[i]) {
this->vals_[i] = other.vals_[i] ;
bestIndices.vals_[i] = otherIndices.vals_[i] ;
flag |= (0x1 << i) ;
}
}
return flag ;
}
void addThirdIfFirstGTSecond(const EDVec64x2& first, const EDVec64x2& second,
const EDVec64x2& third, const EDVec64x2& zero) {
for (int i=0; i < 2; ++i) {
if (first.vals_[i] > second.vals_[i])
this->vals_[i] += third.vals_[i] ;
}
}
/*
EDVec64x2 abs(const EDVec64x2& other) const {
EDVec64x2 r ;
r.vals_[0] = labs(this->vals_[0] - other.vals_[0]) ;
r.vals_[1] = labs(this->vals_[1] - other.vals_[1]) ;
return r ;
}
EDVec64x2 abs() const {
EDVec64x2 r ;
r.vals_[0] = labs(this->vals_[0]) ;
r.vals_[1] = labs(this->vals_[1]) ;
return r ;
}
*/
EDVec64x2 subSat(const EDVec64x2& other) const {
EDVec64x2 r ;
int64_t diff0 = this->vals_[0] - other.vals_[0] ;
int64_t diff1 = this->vals_[1] - other.vals_[1] ;
r.vals_[0] = (diff0 < 0 ? 0 : diff0) ;
r.vals_[1] = (diff1 < 0 ? 0 : diff1) ;
return r ;
}
inline EDVec64x2 shiftBitsRightWithinWords(int shiftVal) {
EDVec64x2 r ;
r.vals_[0] = this->vals_[0] >> shiftVal ;
r.vals_[1] = this->vals_[1] >> shiftVal ;
return r ;
}
inline void shiftWordsLeftByOne() {
vals_[1] = vals_[0] ;
vals_[0] = 0 ;
}
inline bool operator == (const EDVec64x2& other) const {
return this->vals_[0] == other.vals_[0] && this->vals_[1] == other.vals_[1] ;
}
inline EDVec64x2& operator += (const EDVec64x2& other) {
this->vals_[0] += other.vals_[0] ;
this->vals_[1] += other.vals_[1] ;
return *this ;
}
inline EDVec64x2& operator -= (const EDVec64x2& other) {
this->vals_[0] -= other.vals_[0] ;
this->vals_[1] -= other.vals_[1] ;
return *this ;
}
inline EDVec64x2 operator+ (const EDVec64x2& other) const {
EDVec64x2 r ;
r.vals_[0] = this->vals_[0] + other.vals_[0] ;
r.vals_[1] = this->vals_[1] + other.vals_[1] ;
return r ;
}
inline EDVec64x2 operator- (const EDVec64x2& other) const {
EDVec64x2 r ;
r.vals_[0] = this->vals_[0] - other.vals_[0] ;
r.vals_[1] = this->vals_[1] - other.vals_[1] ;
return r ;
}
inline EDVec64x2 operator* (const EDVec64x2& other) const {
EDVec64x2 r ;
r.vals_[0] = this->vals_[0] * other.vals_[0] ;
r.vals_[1] = this->vals_[1] * other.vals_[1] ;
return r ;
}
inline EDVec64x2 operator& (const EDVec64x2& other) const {
EDVec64x2 r ;
r.vals_[0] = this->vals_[0] & other.vals_[0] ;
r.vals_[1] = this->vals_[1] & other.vals_[1] ;
return r ;
}
static int PERIOD() {
return 64 ;
}
static int WORD_CNT() {
return 2 ;
}
friend std::ostream& operator<< (std::ostream& os, const EDVec64x2& v) {
os << "{" << v.vals_[0] << ", " << v.vals_[1] << "}" ;
return os ;
}
} ;
#endif // #ifndef _ED_INTRAV64x2_H

1079
ed_intravED.h 100644

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,7 @@
#include "utils.h"
#include "kseq.h"
#include "utils.h"
#include "intel_ext.h"
KSEQ_DECLARE(gzFile)
extern unsigned char nst_nt4_table[256];
@ -51,6 +52,7 @@ int main_mem(int argc, char *argv[])
memset(pes, 0, 4 * sizeof(mem_pestat_t));
for (i = 0; i < 4; ++i) pes[i].failed = 1;
intel_init();
opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {

56
filter.h 100644
View File

@ -0,0 +1,56 @@
// Needs to be called only once per program to set up static arrays
void init_ed_dist() ;
// The filtering function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Confidence: For now, it's either 0.0 or 1.0, corresponding to no/full confidence in outputs
// Usage:
// If confidence == 0.0: Partial alignment - need to rerun ksw_extend(...)
// If confidence == 1.0
// if alignedQLen == queryLen: Full alignment
// if alignedQLen == 0: No alignment
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
//
void extend_with_edit_dist(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus,
int& alignedQLen, int& alignedRLen, int& score, float& confidence) ;
// Filter-and-extend function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// zdrop value passed to ksw_extend
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Behavior:
// The filtering function will be called internally first.
// If there is an obvious result, it will be returned.
// If not, ksw_extend() will be called with feedback from filtering function,
// and its result will be returned.
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
// It is assumed that ksw_extend(...) function is linked. In this version,
// it is defined in bwa_extend.cpp file.
void filter_and_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus, int zdrop,
int& alignedQLen, int& alignedRLen, int& score) ;
// Declaration needed for filter_and_extend(), defined in bwa_extend.cpp
extern "C" {
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) ;
}

60
intel_ext.cpp 100644
View File

@ -0,0 +1,60 @@
#include "intel_ext.h"
// Needs to be called only once per program to set up static arrays
void init_ed_dist() ;
// The filtering function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Confidence: For now, it's either 0.0 or 1.0, corresponding to no/full confidence in outputs
// Usage:
// If confidence == 0.0: Partial alignment - need to rerun ksw_extend(...)
// If confidence == 1.0
// if alignedQLen == queryLen: Full alignment
// if alignedQLen == 0: No alignment
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
//
void extend_with_edit_dist(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus,
int& alignedQLen, int& alignedRLen, int& score, float& confidence) ;
// Filter-and-extend function:
// Inputs: Reference and query sequences,
// Initial score (i.e. h0),
// endBonus (i.e. the extra score if the whole query is aligned)
// zdrop value passed to ksw_extend
// Outputs:
// Alignment length in query
// Alignment length in reference
// Alignment score
// Behavior:
// The filtering function will be called internally first.
// If there is an obvious result, it will be returned.
// If not, ksw_extend() will be called with feedback from filtering function,
// and its result will be returned.
// Notes:
// For now, the costMatrix and gap penalties are hardcoded.
// It is assumed that ksw_extend(...) function is linked. In this version,
// it is defined in bwa_extend.cpp file.
void filter_and_extend(uint8_t* refSeq, int refLen, uint8_t* querySeq, int queryLen,
int initScore, int endBonus, int zdrop,
int& alignedQLen, int& alignedRLen, int& score) ;
void intel_init()
{
init_ed_dist();
}
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
int *alignedQLen, int *alignedRLen, int *score, float *confidence)
{
extend_with_edit_dist(refSeq, refLen, querySeq, queryLen, initScore, endBonus, *alignedQLen, *alignedRLen, *score, *confidence);
}

18
intel_ext.h 100644
View File

@ -0,0 +1,18 @@
#ifndef INTEL_EXT_H
#define INTEL_EXT_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
void intel_init();
void intel_filter(uint8_t *refSeq, int refLen, uint8_t *querySeq, int queryLen, int initScore, int endBonus,
int *alignedQLen, int *alignedRLen, int *score, float *confidence);
#ifdef __cplusplus
}
#endif
#endif