r329: ditch stdaln.{c,h}; no changes to bwa-mem
stdaln.{c,h} was written ten years ago. Its local and SW extension code are
actually buggy (though that rarely happens and usually does not affect the
results too much). ksw.{c,h} is more concise, potentially faster, less buggy,
and richer in features.
This commit is contained in:
parent
bb37e14d02
commit
98f8966750
6
Makefile
6
Makefile
|
|
@ -4,7 +4,7 @@ CXXFLAGS= $(CFLAGS)
|
|||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
||||
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
|
||||
AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
is.o bwtindex.o bwape.o kopen.o \
|
||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||
|
|
@ -48,8 +48,8 @@ fastmap.o:bwt.h bwamem.h
|
|||
bwtaln.o:bwt.h bwtaln.h kseq.h
|
||||
bwtgap.o:bwtgap.h bwtaln.h bwt.h
|
||||
|
||||
bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
|
||||
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
|
||||
bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h
|
||||
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h
|
||||
bwtsw2_main.o:bwtsw2.h
|
||||
|
||||
clean:
|
||||
|
|
|
|||
41
bwape.c
41
bwape.c
|
|
@ -8,9 +8,9 @@
|
|||
#include "kvec.h"
|
||||
#include "bntseq.h"
|
||||
#include "utils.h"
|
||||
#include "stdaln.h"
|
||||
#include "bwase.h"
|
||||
#include "bwa.h"
|
||||
#include "ksw.h"
|
||||
|
||||
typedef struct {
|
||||
int n;
|
||||
|
|
@ -397,16 +397,17 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
|
|||
#define SW_MIN_MAPQ 17
|
||||
|
||||
// cnt = n_mm<<16 | n_gapo<<8 | n_gape
|
||||
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
|
||||
int *n_cigar, uint32_t *_cnt)
|
||||
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen, int *n_cigar, uint32_t *_cnt)
|
||||
{
|
||||
kswr_t r;
|
||||
uint32_t *cigar32 = 0;
|
||||
bwa_cigar_t *cigar = 0;
|
||||
ubyte_t *ref_seq;
|
||||
bwtint_t k, x, y, l;
|
||||
int path_len, ret, subo;
|
||||
AlnParam ap = aln_param_bwa;
|
||||
path_t *path, *p;
|
||||
int xtra;
|
||||
int8_t mat[25];
|
||||
|
||||
bwa_fill_scmat(1, 3, mat);
|
||||
// check whether there are too many N's
|
||||
if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
|
||||
for (k = 0, x = 0; k < len; ++k)
|
||||
|
|
@ -417,15 +418,19 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
ref_seq = (ubyte_t*)calloc(reglen, 1);
|
||||
for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
|
||||
ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
|
||||
path = (path_t*)calloc(l+len, sizeof(path_t));
|
||||
|
||||
// do alignment
|
||||
ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo);
|
||||
if (ret < 0 || subo == ret) { // no hit or tandem hits
|
||||
free(path); free(cigar); free(ref_seq); *n_cigar = 0;
|
||||
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
|
||||
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
|
||||
ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
|
||||
cigar = (bwa_cigar_t*)cigar32;
|
||||
for (k = 0; k < *n_cigar; ++k)
|
||||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||
|
||||
if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
|
||||
free(cigar); free(ref_seq); *n_cigar = 0;
|
||||
return 0;
|
||||
}
|
||||
cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
|
||||
|
||||
// check whether the alignment is good enough
|
||||
for (k = 0, x = y = 0; k < *n_cigar; ++k) {
|
||||
|
|
@ -435,17 +440,14 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
else y += __cigar_len(c);
|
||||
}
|
||||
if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
|
||||
free(path); free(cigar); free(ref_seq);
|
||||
free(cigar); free(ref_seq);
|
||||
*n_cigar = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
{ // update cigar and coordinate;
|
||||
int start, end;
|
||||
p = path + path_len - 1;
|
||||
*beg += (p->i? p->i : 1) - 1;
|
||||
start = (p->j? p->j : 1) - 1;
|
||||
end = path->j;
|
||||
int start = r.qb, end = r.qe + 1;
|
||||
*beg += r.tb;
|
||||
cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
|
||||
if (start) {
|
||||
memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
|
||||
|
|
@ -462,8 +464,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
{ // set *cnt
|
||||
int n_mm, n_gapo, n_gape;
|
||||
n_mm = n_gapo = n_gape = 0;
|
||||
p = path + path_len - 1;
|
||||
x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
|
||||
x = r.tb; y = r.qb;
|
||||
for (k = 0; k < *n_cigar; ++k) {
|
||||
bwa_cigar_t c = cigar[k];
|
||||
if (__cigar_op(c) == FROM_M) {
|
||||
|
|
@ -479,7 +480,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
*_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
|
||||
}
|
||||
|
||||
free(ref_seq); free(path);
|
||||
free(ref_seq);
|
||||
return cigar;
|
||||
}
|
||||
|
||||
|
|
|
|||
6
bwase.c
6
bwase.c
|
|
@ -4,7 +4,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "stdaln.h"
|
||||
#include "bwase.h"
|
||||
#include "bwtaln.h"
|
||||
#include "bntseq.h"
|
||||
|
|
@ -205,8 +204,8 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
|||
if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end
|
||||
|
||||
// change "I" at either end of the read to S. just in case. This should rarely happen...
|
||||
if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
|
||||
if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));
|
||||
if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(FROM_S, (__cigar_len(cigar[*n_cigar-1])));
|
||||
if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(FROM_S, (__cigar_len(cigar[0])));
|
||||
|
||||
*_pos = (bwtint_t)__pos;
|
||||
free(ref_seq);
|
||||
|
|
@ -589,5 +588,6 @@ int bwa_sai2sam_se(int argc, char *argv[])
|
|||
return 0;
|
||||
}
|
||||
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
|
||||
free(prefix);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
15
bwtaln.c
15
bwtaln.c
|
|
@ -312,18 +312,3 @@ int bwa_aln(int argc, char *argv[])
|
|||
free(opt); free(prefix);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
|
||||
__cigar_op and __cigar_len while keeping stdaln stand alone */
|
||||
bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
|
||||
{
|
||||
uint32_t *cigar32;
|
||||
bwa_cigar_t *cigar;
|
||||
int i;
|
||||
cigar32 = aln_path2cigar32((path_t*) path, path_len, n_cigar);
|
||||
cigar = (bwa_cigar_t*)cigar32;
|
||||
for (i = 0; i < *n_cigar; ++i)
|
||||
cigar[i] = __cigar_create( (cigar32[i]&0xf), (cigar32[i]>>4) );
|
||||
return cigar;
|
||||
}
|
||||
|
||||
|
|
|
|||
12
bwtaln.h
12
bwtaln.h
|
|
@ -28,6 +28,11 @@
|
|||
#define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
|
||||
#endif
|
||||
|
||||
#define FROM_M 0
|
||||
#define FROM_I 1
|
||||
#define FROM_D 2
|
||||
#define FROM_S 3
|
||||
|
||||
typedef struct {
|
||||
bwtint_t w;
|
||||
int bid;
|
||||
|
|
@ -138,13 +143,6 @@ extern "C" {
|
|||
|
||||
void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
|
||||
|
||||
|
||||
/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
|
||||
__cigar_op and __cigar_len while keeping stdaln stand alone */
|
||||
#include "stdaln.h"
|
||||
|
||||
bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -4,7 +4,7 @@
|
|||
#include "utils.h"
|
||||
|
||||
#ifndef PACKAGE_VERSION
|
||||
#define PACKAGE_VERSION "0.7.0-r324-beta"
|
||||
#define PACKAGE_VERSION "0.7.0-r329-beta"
|
||||
#endif
|
||||
|
||||
static int usage()
|
||||
|
|
|
|||
162
stdaln.h
162
stdaln.h
|
|
@ -1,162 +0,0 @@
|
|||
/* The MIT License
|
||||
|
||||
Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
2009-07-23, 0.10.0
|
||||
|
||||
- Use 32-bit to store CIGAR
|
||||
|
||||
- Report suboptimal aligments
|
||||
|
||||
- Implemented half-fixed-half-open DP
|
||||
|
||||
2009-04-26, 0.9.10
|
||||
|
||||
- Allow to set a threshold for local alignment
|
||||
|
||||
2009-02-18, 0.9.9
|
||||
|
||||
- Fixed a bug when no residue matches
|
||||
|
||||
2008-08-04, 0.9.8
|
||||
|
||||
- Fixed the wrong declaration of aln_stdaln_aux()
|
||||
|
||||
- Avoid 0 coordinate for global alignment
|
||||
|
||||
2008-08-01, 0.9.7
|
||||
|
||||
- Change gap_end penalty to 5 in aln_param_bwa
|
||||
|
||||
- Add function to convert path_t to the CIGAR format
|
||||
|
||||
2008-08-01, 0.9.6
|
||||
|
||||
- The first gap now costs (gap_open+gap_ext), instead of
|
||||
gap_open. Scoring systems are modified accordingly.
|
||||
|
||||
- Gap end is now correctly handled. Previously it is not correct.
|
||||
|
||||
- Change license to MIT.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef LH3_STDALN_H_
|
||||
#define LH3_STDALN_H_
|
||||
|
||||
|
||||
#define STDALN_VERSION 0.11.0
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define FROM_M 0
|
||||
#define FROM_I 1
|
||||
#define FROM_D 2
|
||||
#define FROM_S 3
|
||||
|
||||
#define ALN_TYPE_LOCAL 0
|
||||
#define ALN_TYPE_GLOBAL 1
|
||||
#define ALN_TYPE_EXTEND 2
|
||||
|
||||
/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
|
||||
#define MINOR_INF -1073741823
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int gap_open;
|
||||
int gap_ext;
|
||||
int gap_end;
|
||||
|
||||
int *matrix;
|
||||
int row;
|
||||
int band_width;
|
||||
} AlnParam;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int i, j;
|
||||
unsigned char ctype;
|
||||
} path_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
path_t *path; /* for advanced users... :-) */
|
||||
int path_len; /* for advanced users... :-) */
|
||||
int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
|
||||
int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
|
||||
int score, subo; /* score */
|
||||
|
||||
char *out1, *out2; /* print them, and then you will know */
|
||||
char *outm;
|
||||
|
||||
int n_cigar;
|
||||
uint32_t *cigar32;
|
||||
} AlnAln;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
|
||||
int type, int do_align, int len1, int len2);
|
||||
AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
|
||||
void aln_free_AlnAln(AlnAln *aa);
|
||||
|
||||
int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
||||
path_t *path, int *path_len);
|
||||
int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
||||
path_t *path, int *path_len, int _thres, int *_subo);
|
||||
int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
||||
path_t *path, int *path_len, int G0, uint8_t *_mem);
|
||||
uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
|
||||
uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/********************
|
||||
* global variables *
|
||||
********************/
|
||||
|
||||
extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
|
||||
extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
|
||||
extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
|
||||
extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
|
||||
extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
|
||||
|
||||
/* common nucleotide score matrix for 16 bases */
|
||||
extern int aln_sm_nt[], aln_sm_bwa[];
|
||||
|
||||
/* BLOSUM62 and BLOSUM45 */
|
||||
extern int aln_sm_blosum62[], aln_sm_blosum45[];
|
||||
|
||||
/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
|
||||
extern int aln_sm_read[];
|
||||
|
||||
/* human-mouse score matrix for 4 bases */
|
||||
extern int aln_sm_hs[];
|
||||
|
||||
#endif
|
||||
Loading…
Reference in New Issue