concatenate for-rev sequences in the end
This commit is contained in:
parent
ca809a44d9
commit
aabb807734
56
bntseq.c
56
bntseq.c
|
|
@ -163,6 +163,9 @@ void bns_destroy(bntseq_t *bns)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define _set_pac(pac, l, c) ((pac)[(l)>>2] |= (c)<<((~(l)&3)<<1))
|
||||||
|
#define _get_pac(pac, l) ((pac)[(l)>>2]>>((~(l)&3)<<1)&3)
|
||||||
|
|
||||||
static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_pac, int *m_seqs, int *m_holes, bntamb1_t **q)
|
static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_pac, int *m_seqs, int *m_holes, bntamb1_t **q)
|
||||||
{
|
{
|
||||||
bntann1_t *p;
|
bntann1_t *p;
|
||||||
|
|
@ -178,7 +181,7 @@ static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_
|
||||||
p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
|
p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
|
||||||
p->n_ambs = 0;
|
p->n_ambs = 0;
|
||||||
for (i = lasts = 0; i < seq->seq.l; ++i) {
|
for (i = lasts = 0; i < seq->seq.l; ++i) {
|
||||||
int c = seq->seq.s[i];
|
int c = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
if (c >= 4) { // N
|
if (c >= 4) { // N
|
||||||
if (lasts == seq->seq.s[i]) { // contiguous N
|
if (lasts == seq->seq.s[i]) { // contiguous N
|
||||||
++(*q)->len;
|
++(*q)->len;
|
||||||
|
|
@ -190,20 +193,20 @@ static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_
|
||||||
*q = bns->ambs + bns->n_holes;
|
*q = bns->ambs + bns->n_holes;
|
||||||
(*q)->len = 1;
|
(*q)->len = 1;
|
||||||
(*q)->offset = p->offset + i;
|
(*q)->offset = p->offset + i;
|
||||||
(*q)->amb = 'N';
|
(*q)->amb = seq->seq.s[i];
|
||||||
++p->n_ambs;
|
++p->n_ambs;
|
||||||
++bns->n_holes;
|
++bns->n_holes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lasts = seq->seq.s[i];
|
lasts = seq->seq.s[i];
|
||||||
{ // fill buffer
|
{ // fill buffer
|
||||||
if (c >= 4) c = c>>4;
|
if (c >= 4) c = lrand48()&3;
|
||||||
if (bns->l_pac == *m_pac) { // double the pac size
|
if (bns->l_pac == *m_pac) { // double the pac size
|
||||||
*m_pac <<= 1;
|
*m_pac <<= 1;
|
||||||
pac = realloc(pac, *m_pac/4);
|
pac = realloc(pac, *m_pac/4);
|
||||||
memset(pac + bns->l_pac/4, 0, (*m_pac - bns->l_pac)/4);
|
memset(pac + bns->l_pac/4, 0, (*m_pac - bns->l_pac)/4);
|
||||||
}
|
}
|
||||||
pac[bns->l_pac>>2] |= c << ((3 - (bns->l_pac&3)) << 1);
|
_set_pac(pac, bns->l_pac, c);
|
||||||
++bns->l_pac;
|
++bns->l_pac;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -218,8 +221,8 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
|
||||||
char name[1024];
|
char name[1024];
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
uint8_t *pac = 0;
|
uint8_t *pac = 0;
|
||||||
int32_t i, m_seqs, m_holes;
|
int32_t m_seqs, m_holes;
|
||||||
int64_t ret = -1, m_pac;
|
int64_t ret = -1, m_pac, l;
|
||||||
bntamb1_t *q;
|
bntamb1_t *q;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
|
||||||
|
|
@ -236,19 +239,13 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
|
||||||
strcpy(name, prefix); strcat(name, ".pac");
|
strcpy(name, prefix); strcat(name, ".pac");
|
||||||
fp = xopen(name, "wb");
|
fp = xopen(name, "wb");
|
||||||
// read sequences
|
// read sequences
|
||||||
while (kseq_read(seq) >= 0) {
|
while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
|
||||||
for (i = 0; i < seq->seq.l; ++i) { // convert to 2-bit encoding
|
if (!for_only) { // add the reverse complemented sequence
|
||||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
|
||||||
if (seq->seq.s[i] > 3)
|
pac = realloc(pac, m_pac/4);
|
||||||
seq->seq.s[i] |= (lrand48()&3) << 4;
|
memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
|
||||||
}
|
for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
|
||||||
pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
|
_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
|
||||||
if (!for_only) {
|
|
||||||
seq_reverse(seq->seq.l, (uint8_t*)seq->seq.s, 0); // reversed but not complemented
|
|
||||||
for (i = 0; i < seq->seq.l; ++i) // complement
|
|
||||||
seq->seq.s[i] = seq->seq.s[i] < 4? 3 - seq->seq.s[i] : ((3 - (seq->seq.s[i]>>4)) << 4 | 4);
|
|
||||||
pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ret = bns->l_pac;
|
ret = bns->l_pac;
|
||||||
{ // finalize .pac file
|
{ // finalize .pac file
|
||||||
|
|
@ -290,32 +287,21 @@ int bwa_fa2pac(int argc, char *argv[])
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t bns_pos2refId(const bntseq_t *bns, int64_t pos, int is_fr, int *ref_id, int *is_rev)
|
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)
|
||||||
{
|
{
|
||||||
int left, mid, right;
|
int left, mid, right, nn;
|
||||||
is_fr = is_fr? 1 : 0;
|
if (ref_id) {
|
||||||
left = 0; mid = 0; right = bns->n_seqs;
|
left = 0; mid = 0; right = bns->n_seqs;
|
||||||
while (left < right) {
|
while (left < right) {
|
||||||
mid = (left + right) >> 1;
|
mid = (left + right) >> 1;
|
||||||
if (pos >= bns->anns[mid].offset<<is_fr) {
|
if (pos_f >= bns->anns[mid].offset) {
|
||||||
if (mid == bns->n_seqs - 1) break;
|
if (mid == bns->n_seqs - 1) break;
|
||||||
if (pos < bns->anns[mid+1].offset<<is_fr) break; // bracketed
|
if (pos_f < bns->anns[mid+1].offset) break; // bracketed
|
||||||
left = mid + 1;
|
left = mid + 1;
|
||||||
} else right = mid;
|
} else right = mid;
|
||||||
}
|
}
|
||||||
*ref_id = mid;
|
*ref_id = mid;
|
||||||
if (is_fr) { // then compute the forward-only position
|
|
||||||
bntann1_t *p = &bns->anns[mid];
|
|
||||||
if (pos - (p->offset<<1) < p->len) *is_rev = 0, pos -= p->offset;
|
|
||||||
else *is_rev = 1, pos = p->len - (pos - (p->offset<<1) - p->len) + p->offset;
|
|
||||||
}
|
}
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)
|
|
||||||
{
|
|
||||||
int left, mid, right, nn;
|
|
||||||
if (ref_id) bns_pos2refId(bns, pos_f, 0, ref_id, 0);
|
|
||||||
left = 0; right = bns->n_holes; nn = 0;
|
left = 0; right = bns->n_holes; nn = 0;
|
||||||
while (left < right) {
|
while (left < right) {
|
||||||
mid = (left + right) >> 1;
|
mid = (left + right) >> 1;
|
||||||
|
|
|
||||||
7
bntseq.h
7
bntseq.h
|
|
@ -71,12 +71,15 @@ extern "C" {
|
||||||
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
|
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
|
||||||
void bns_destroy(bntseq_t *bns);
|
void bns_destroy(bntseq_t *bns);
|
||||||
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only);
|
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only);
|
||||||
int64_t bns_pos2refId(const bntseq_t *bns, int64_t pos, int is_fr, int *ref_id, int *is_rev);
|
|
||||||
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
|
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline int64_t bns_depos(const bntseq_t *bns, int64_t pos, int *is_rev)
|
||||||
|
{
|
||||||
|
return (*is_rev = (pos >= bns->l_pac))? (bns->l_pac<<1) - pos : pos;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
7
bwase.c
7
bwase.c
|
|
@ -111,10 +111,9 @@ int bwa_approx_mapQ(const bwa_seq_t *p, int mm)
|
||||||
|
|
||||||
bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand)
|
bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand)
|
||||||
{
|
{
|
||||||
bwtint_t pos_fr, pos_f;
|
bwtint_t pos_f;
|
||||||
int is_rev, ref_id;
|
int is_rev;
|
||||||
pos_fr = bwt_sa(bwt, sapos);
|
pos_f = bns_depos(bns, bwt_sa(bwt, sapos), &is_rev); // pos_f
|
||||||
pos_f = bns_pos2refId(bns, pos_fr, 1, &ref_id, &is_rev); // pos_f
|
|
||||||
*strand = !is_rev;
|
*strand = !is_rev;
|
||||||
/* NB: For gapped alignment, pacpos may not be correct, which will be fixed
|
/* NB: For gapped alignment, pacpos may not be correct, which will be fixed
|
||||||
* in refine_gapped_core(). This line also determines the way "x" is
|
* in refine_gapped_core(). This line also determines the way "x" is
|
||||||
|
|
|
||||||
|
|
@ -268,7 +268,7 @@ static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, i
|
||||||
* will be obtained and stored in b->hits[*].k. */
|
* will be obtained and stored in b->hits[*].k. */
|
||||||
int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS)
|
int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS)
|
||||||
{
|
{
|
||||||
int i, j, n, ref_id, is_rev;
|
int i, j, n, is_rev;
|
||||||
if (b->n == 0) return 0;
|
if (b->n == 0) return 0;
|
||||||
if (bwt && bns) { // convert to chromosomal coordinates if requested
|
if (bwt && bns) { // convert to chromosomal coordinates if requested
|
||||||
int old_n = b->n;
|
int old_n = b->n;
|
||||||
|
|
@ -287,7 +287,7 @@ int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int
|
||||||
if (p->G == 0 && p->k == 0 && p->l == 0 && p->len == 0) continue;
|
if (p->G == 0 && p->k == 0 && p->l == 0 && p->len == 0) continue;
|
||||||
for (k = p->k; k <= p->l; ++k) {
|
for (k = p->k; k <= p->l; ++k) {
|
||||||
b->hits[j] = *p;
|
b->hits[j] = *p;
|
||||||
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, k), 1, &ref_id, &is_rev);
|
b->hits[j].k = bns_depos(bns, bwt_sa(bwt, k), &is_rev);
|
||||||
b->hits[j].l = 0;
|
b->hits[j].l = 0;
|
||||||
b->hits[j].is_rev = is_rev;
|
b->hits[j].is_rev = is_rev;
|
||||||
if (is_rev) b->hits[j].k -= p->len;
|
if (is_rev) b->hits[j].k -= p->len;
|
||||||
|
|
@ -295,7 +295,7 @@ int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int
|
||||||
}
|
}
|
||||||
} else if (p->G > 0) {
|
} else if (p->G > 0) {
|
||||||
b->hits[j] = *p;
|
b->hits[j] = *p;
|
||||||
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, p->k), 1, &ref_id, &is_rev);
|
b->hits[j].k = bns_depos(bns, bwt_sa(bwt, p->k), &is_rev);
|
||||||
b->hits[j].l = 0;
|
b->hits[j].l = 0;
|
||||||
b->hits[j].flag |= 1;
|
b->hits[j].flag |= 1;
|
||||||
b->hits[j].is_rev = is_rev;
|
b->hits[j].is_rev = is_rev;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue