backup the current debugging code; more changes

This commit is contained in:
Heng Li 2011-10-25 10:03:57 -04:00
parent 22c2252e15
commit 7626595e3a
3 changed files with 33 additions and 6 deletions

View File

@ -190,14 +190,14 @@ static void add1(const kseq_t *seq, bntseq_t *bns, FILE *fp, uint8_t *buf, int *
*q = bns->ambs + bns->n_holes; *q = bns->ambs + bns->n_holes;
(*q)->len = 1; (*q)->len = 1;
(*q)->offset = p->offset + i; (*q)->offset = p->offset + i;
(*q)->amb = seq->seq.s[i]; (*q)->amb = 'N';
++p->n_ambs; ++p->n_ambs;
++bns->n_holes; ++bns->n_holes;
} }
} }
lasts = seq->seq.s[i]; lasts = seq->seq.s[i];
{ // fill buffer { // fill buffer
if (c >= 4) c = lrand48()&0x3; if (c >= 4) c = c>>4;
if (*l_buf == 0x40000) { if (*l_buf == 0x40000) {
fwrite(buf, 1, 0x10000, fp); fwrite(buf, 1, 0x10000, fp);
memset(buf, 0, 0x10000); memset(buf, 0, 0x10000);
@ -238,11 +238,16 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
memset(buf, 0, 0x10000); memset(buf, 0, 0x10000);
// read sequences // read sequences
while (kseq_read(seq) >= 0) { while (kseq_read(seq) >= 0) {
for (i = 0; i < seq->seq.l; ++i) // convert to 2-bit encoding for (i = 0; i < seq->seq.l; ++i) { // convert to 2-bit encoding
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
if (seq->seq.s[i] > 3)
seq->seq.s[i] |= (lrand48()&3) << 4;
}
add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q); add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q);
if (!for_only) { if (!for_only) {
seq_reverse(seq->seq.l, (uint8_t*)seq->seq.s, 1); seq_reverse(seq->seq.l, (uint8_t*)seq->seq.s, 0); // reversed but not complemented
for (i = 0; i < seq->seq.l; ++i) // complement
seq->seq.s[i] = seq->seq.s[i] < 4? 3 - seq->seq.s[i] : ((3 - (seq->seq.s[i]>>4)) << 4 | 4);
add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q); add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q);
} }
} }

5
bwt.c
View File

@ -282,9 +282,9 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
prev = tmpvec[0]? tmpvec[0] : &a[0]; prev = tmpvec[0]? tmpvec[0] : &a[0];
curr = tmpvec[1]? tmpvec[1] : &a[1]; curr = tmpvec[1]? tmpvec[1] : &a[1];
bwt_set_intv(bwt, q[x], ik); bwt_set_intv(bwt, q[x], ik);
ik.info = x + 1; ik.info = x + 1;
for (i = x + 1; i < len; ++i) { // forward search
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
if (q[i] > 3) break; if (q[i] > 3) break;
c = 3 - q[i]; c = 3 - q[i];
bwt_extend(bwt, &ik, ok, 0); bwt_extend(bwt, &ik, ok, 0);
@ -298,6 +298,7 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
ret = curr->a[0].info; // this will be the returned value ret = curr->a[0].info; // this will be the returned value
swap = curr; curr = prev; prev = swap; swap = curr; curr = prev; prev = swap;
if (x == 40) printf("[%lld,%lld,%lld]\n", prev->a[0].x[0], prev->a[0].x[1], prev->a[0].x[2]);
for (i = x - 1; i >= -1; --i) { // backward search for MEMs for (i = x - 1; i >= -1; --i) { // backward search for MEMs
if (q[i] > 3) break; if (q[i] > 3) break;
c = i < 0? 0 : q[i]; c = i < 0? 0 : q[i];

View File

@ -39,6 +39,27 @@ int main_fastmap(int argc, char *argv[])
while (kseq_read(seq) >= 0) { while (kseq_read(seq) >= 0) {
for (i = 0; i < seq->seq.l; ++i) for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
{
int beg = 98;
bwtintv_t ik, ok[4];
bwt_set_intv(bwt, seq->seq.s[seq->seq.l - 1], ik);
for (i = seq->seq.l - 2; i >= beg; --i) {
//printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i+1);
bwt_extend(bwt, &ik, ok, 1);
ik = ok[seq->seq.s[i]];
if (ik.x[2] == 0) break;
}
printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i+1);
printf("======================== %lld, [%lld,%lld,%lld,%lld]\n", bwt->primary, bwt->L2[1], bwt->L2[2]-bwt->L2[1], bwt->L2[3]-bwt->L2[2], bwt->L2[4]-bwt->L2[3]);
bwt_set_intv(bwt, seq->seq.s[beg], ik);
for (i = beg + 1; i < seq->seq.l; ++i) {
//printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i-1);
bwt_extend(bwt, &ik, ok, 0);
ik = ok[3-seq->seq.s[i]];
if (ik.x[2] == 0) break;
}
printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i-1);
}
bwt_smem(bwt, seq->seq.l, (uint8_t*)seq->seq.s, &mem, tvec); bwt_smem(bwt, seq->seq.l, (uint8_t*)seq->seq.s, &mem, tvec);
printf(">%s\t%ld\n", seq->name.s, mem.n); printf(">%s\t%ld\n", seq->name.s, mem.n);
for (i = 0; i < mem.n; ++i) { for (i = 0; i < mem.n; ++i) {