From 46123639cff6247bb6bc351f29fed91dfc498af8 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 20 Oct 2011 12:09:35 -0400 Subject: [PATCH] removed reverse pac; bwa is not working right now --- bntseq.c | 3 +++ bwtindex.c | 45 --------------------------------------------- bwtmisc.c | 37 ------------------------------------- main.c | 2 -- main.h | 1 - 5 files changed, 3 insertions(+), 85 deletions(-) diff --git a/bntseq.c b/bntseq.c index b83b4e1..41fb68c 100644 --- a/bntseq.c +++ b/bntseq.c @@ -213,6 +213,7 @@ static void add1(const kseq_t *seq, bntseq_t *bns, FILE *fp, uint8_t *buf, int * int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) { + extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; @@ -238,6 +239,8 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) // read sequences while (kseq_read(seq) >= 0) { add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q); + seq_reverse(seq->seq.l, (uint8_t*)seq->seq.s, 1); + add1(seq, bns, fp, buf, &l_buf, &m_seqs, &m_holes, &q); } xassert(bns->l_pac, "zero length sequence."); ret = bns->l_pac; diff --git a/bwtindex.c b/bwtindex.c index c752a2f..2dadea3 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -105,14 +105,6 @@ int bwa_index(int argc, char *argv[]) return 1; } if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT - { - strcpy(str, prefix); strcat(str, ".pac"); - strcpy(str2, prefix); strcat(str2, ".rpac"); - t = clock(); - fprintf(stderr, "[bwa_index] Reverse the packed sequence... "); - bwa_pac_rev_core(str, str2); - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - } { strcpy(str, prefix); strcat(str, ".pac"); strcpy(str2, prefix); strcat(str2, ".bwt"); @@ -127,20 +119,6 @@ int bwa_index(int argc, char *argv[]) } fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); } - { - strcpy(str, prefix); strcat(str, ".rpac"); - strcpy(str2, prefix); strcat(str2, ".rbwt"); - t = clock(); - fprintf(stderr, "[bwa_index] Construct BWT for the reverse packed sequence...\n"); - if (algo_type == 2) bwt_bwtgen(str, str2); - else if (algo_type == 1 || algo_type == 3) { - bwt_t *bwt; - bwt = bwt_pac2bwt(str, algo_type == 3); - bwt_dump_bwt(str2, bwt); - bwt_destroy(bwt); - } - fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); - } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); @@ -152,17 +130,6 @@ int bwa_index(int argc, char *argv[]) bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } - { - bwt_t *bwt; - strcpy(str, prefix); strcat(str, ".rbwt"); - t = clock(); - fprintf(stderr, "[bwa_index] Update reverse BWT... "); - bwt = bwt_restore_bwt(str); - bwt_bwtupdate_core(bwt); - bwt_dump_bwt(str, bwt); - bwt_destroy(bwt); - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); @@ -175,18 +142,6 @@ int bwa_index(int argc, char *argv[]) bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } - { - bwt_t *bwt; - strcpy(str, prefix); strcat(str, ".rbwt"); - strcpy(str3, prefix); strcat(str3, ".rsa"); - t = clock(); - fprintf(stderr, "[bwa_index] Construct SA from reverse BWT and Occ... "); - bwt = bwt_restore_bwt(str); - bwt_cal_sa(bwt, 32); - bwt_dump_sa(str3, bwt); - bwt_destroy(bwt); - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - } free(str3); free(str2); free(str); free(prefix); return 0; } diff --git a/bwtmisc.c b/bwtmisc.c index 8d20287..c35d684 100644 --- a/bwtmisc.c +++ b/bwtmisc.c @@ -157,43 +157,6 @@ int bwa_bwtupdate(int argc, char *argv[]) return 0; } -void bwa_pac_rev_core(const char *fn, const char *fn_rev) -{ - int64_t seq_len, i; - bwtint_t pac_len, j; - ubyte_t *bufin, *bufout, ct; - FILE *fp; - seq_len = bwa_seq_len(fn); - pac_len = (seq_len >> 2) + 1; - bufin = (ubyte_t*)calloc(pac_len, 1); - bufout = (ubyte_t*)calloc(pac_len, 1); - fp = xopen(fn, "rb"); - fread(bufin, 1, pac_len, fp); - fclose(fp); - for (i = seq_len - 1, j = 0; i >= 0; --i) { - int c = bufin[i>>2] >> ((~i&3)<<1) & 3; - bwtint_t j = seq_len - 1 - i; - bufout[j>>2] |= c << ((~j&3)<<1); - } - free(bufin); - fp = xopen(fn_rev, "wb"); - fwrite(bufout, 1, pac_len, fp); - ct = seq_len % 4; - fwrite(&ct, 1, 1, fp); - fclose(fp); - free(bufout); -} - -int bwa_pac_rev(int argc, char *argv[]) -{ - if (argc < 3) { - fprintf(stderr, "Usage: bwa pac_rev \n"); - return 1; - } - bwa_pac_rev_core(argv[1], argv[2]); - return 0; -} - const int nst_color_space_table[] = { 4, 0, 0, 1, 0, 2, 3, 4, 0, 3, 2, 4, 1, 4, 4, 4}; /* this function is not memory efficient, but this will make life easier diff --git a/main.c b/main.c index f3447e6..0e5ee8a 100644 --- a/main.c +++ b/main.c @@ -24,7 +24,6 @@ static int usage() fprintf(stderr, " pac2bwt generate BWT from PAC\n"); fprintf(stderr, " pac2bwtgen alternative algorithm for generating BWT\n"); fprintf(stderr, " bwtupdate update .bwt to the new format\n"); - fprintf(stderr, " pac_rev generate reverse PAC\n"); fprintf(stderr, " bwt2sa generate SA from BWT and Occ\n"); fprintf(stderr, " pac2cspac convert PAC to color-space PAC\n"); fprintf(stderr, " stdsw standard SW/NW alignment\n"); @@ -44,7 +43,6 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "pac2bwt") == 0) return bwa_pac2bwt(argc-1, argv+1); else if (strcmp(argv[1], "pac2bwtgen") == 0) return bwt_bwtgen_main(argc-1, argv+1); else if (strcmp(argv[1], "bwtupdate") == 0) return bwa_bwtupdate(argc-1, argv+1); - else if (strcmp(argv[1], "pac_rev") == 0) return bwa_pac_rev(argc-1, argv+1); else if (strcmp(argv[1], "bwt2sa") == 0) return bwa_bwt2sa(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) return bwa_index(argc-1, argv+1); else if (strcmp(argv[1], "aln") == 0) return bwa_aln(argc-1, argv+1); diff --git a/main.h b/main.h index 5e7697a..15ec189 100644 --- a/main.h +++ b/main.h @@ -6,7 +6,6 @@ extern "C" { #endif int bwa_fa2pac(int argc, char *argv[]); - int bwa_pac_rev(int argc, char *argv[]); int bwa_pac2cspac(int argc, char *argv[]); int bwa_pac2bwt(int argc, char *argv[]); int bwa_bwtupdate(int argc, char *argv[]);