From 10721ca60250ed34ed059c073a4b51180a6f44e8 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 15 Jan 2011 14:07:08 -0500 Subject: [PATCH] Added an option to accept Illumina 1.3+ fastq --- bwape.c | 8 +++++--- bwase.c | 6 ++++-- bwaseqio.c | 6 ++++-- bwtaln.c | 10 +++++++--- bwtaln.h | 1 + main.c | 2 +- 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/bwape.c b/bwape.c index 65de3e2..dec52c3 100644 --- a/bwape.c +++ b/bwape.c @@ -645,7 +645,7 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, j, n_seqs, tot_seqs = 0; + int i, j, n_seqs, tot_seqs = 0, read_flag = 0; bwa_seq_t *seqs[2]; bwa_seqio_t *ks[2]; clock_t t; @@ -691,12 +691,14 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f // core loop bwa_print_sam_SQ(bns); bwa_print_sam_PG(); - while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) { + read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0; + read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1; + while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) { int cnt_chg; isize_info_t ii; ubyte_t *pacseq; - seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual); + seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, read_flag, opt.trim_qual); tot_seqs += n_seqs; t = clock(); diff --git a/bwase.c b/bwase.c index 2dd2a35..12f01fb 100644 --- a/bwase.c +++ b/bwase.c @@ -587,7 +587,7 @@ int bwa_set_rg(const char *s) void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, n_seqs, tot_seqs = 0, m_aln; + int i, n_seqs, tot_seqs = 0, m_aln, read_flag = 0; bwt_aln1_t *aln = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; @@ -611,7 +611,9 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f // set ks ks = bwa_open_reads(opt.mode, fn_fa); // core loop - while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) { + read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0; + read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1; + while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); diff --git a/bwaseqio.c b/bwaseqio.c index 07a3082..10ff83b 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -139,17 +139,19 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com return seqs; } - bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual) +bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int flag, int trim_qual) { bwa_seq_t *seqs, *p; kseq_t *seq = bs->ks; - int n_seqs, l, i; + int n_seqs, l, i, is_comp = flag&1, is_64 = flag&2; long n_trimmed = 0, n_tot = 0; if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); n_seqs = 0; seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); while ((l = kseq_read(seq)) >= 0) { + if (is_64 && seq->qual.l) + for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31; p = &seqs[n_seqs++]; p->tid = -1; // no assigned to a thread p->qual = 0; diff --git a/bwtaln.c b/bwtaln.c index de63676..3aa9a5c 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -172,7 +172,7 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa) void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { - int i, n_seqs, tot_seqs = 0; + int i, n_seqs, tot_seqs = 0, read_flag = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; @@ -190,7 +190,9 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) // core loop fwrite(opt, sizeof(gap_opt_t), 1, stdout); - while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode & BWA_MODE_COMPREAD, opt->trim_qual)) != 0) { + read_flag |= (opt->mode & BWA_MODE_COMPREAD)? 1 : 0; + read_flag |= ((opt->mode & BWA_MODE_IL13)? 1 : 0)<<1; + while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); @@ -246,7 +248,7 @@ int bwa_aln(int argc, char *argv[]) gap_opt_t *opt; opt = gap_init_opt(); - while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012")) >= 0) { + while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012I")) >= 0) { switch (c) { case 'n': if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1; @@ -273,6 +275,7 @@ int bwa_aln(int argc, char *argv[]) case '0': opt->mode |= BWA_MODE_BAM_SE; break; case '1': opt->mode |= BWA_MODE_BAM_READ1; break; case '2': opt->mode |= BWA_MODE_BAM_READ2; break; + case 'I': opt->mode |= BWA_MODE_IL13; break; default: return 1; } } @@ -303,6 +306,7 @@ int bwa_aln(int argc, char *argv[]) fprintf(stderr, " -c input sequences are in the color space\n"); fprintf(stderr, " -L log-scaled gap penalty for long deletions\n"); fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n"); + fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n"); fprintf(stderr, " -b the input read file is in the BAM format\n"); fprintf(stderr, " -0 use single-end reads only (effective with -b)\n"); fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n"); diff --git a/bwtaln.h b/bwtaln.h index 0331b56..9e841f8 100644 --- a/bwtaln.h +++ b/bwtaln.h @@ -87,6 +87,7 @@ typedef struct { #define BWA_MODE_BAM_SE 0x40 #define BWA_MODE_BAM_READ1 0x80 #define BWA_MODE_BAM_READ2 0x100 +#define BWA_MODE_IL13 0x200 typedef struct { int s_mm, s_gapo, s_gape; diff --git a/main.c b/main.c index eff0d68..25e32e8 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include "main.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.5.9rc1-9" +#define PACKAGE_VERSION "0.5.9rc1-r10" #endif static int usage()