Added an option to accept Illumina 1.3+ fastq

This commit is contained in:
Heng Li 2011-01-15 14:07:08 -05:00
parent f335b33624
commit 10721ca602
6 changed files with 22 additions and 11 deletions

View File

@ -645,7 +645,7 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, j, n_seqs, tot_seqs = 0;
int i, j, n_seqs, tot_seqs = 0, read_flag = 0;
bwa_seq_t *seqs[2];
bwa_seqio_t *ks[2];
clock_t t;
@ -691,12 +691,14 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
// core loop
bwa_print_sam_SQ(bns);
bwa_print_sam_PG();
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) {
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
int cnt_chg;
isize_info_t ii;
ubyte_t *pacseq;
seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual);
seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, read_flag, opt.trim_qual);
tot_seqs += n_seqs;
t = clock();

View File

@ -587,7 +587,7 @@ int bwa_set_rg(const char *s)
void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, n_seqs, tot_seqs = 0, m_aln;
int i, n_seqs, tot_seqs = 0, m_aln, read_flag = 0;
bwt_aln1_t *aln = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
@ -611,7 +611,9 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
// set ks
ks = bwa_open_reads(opt.mode, fn_fa);
// core loop
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) {
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
tot_seqs += n_seqs;
t = clock();

View File

@ -139,17 +139,19 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
return seqs;
}
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int flag, int trim_qual)
{
bwa_seq_t *seqs, *p;
kseq_t *seq = bs->ks;
int n_seqs, l, i;
int n_seqs, l, i, is_comp = flag&1, is_64 = flag&2;
long n_trimmed = 0, n_tot = 0;
if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual);
n_seqs = 0;
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
while ((l = kseq_read(seq)) >= 0) {
if (is_64 && seq->qual.l)
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
p = &seqs[n_seqs++];
p->tid = -1; // no assigned to a thread
p->qual = 0;

View File

@ -172,7 +172,7 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
int i, n_seqs, tot_seqs = 0;
int i, n_seqs, tot_seqs = 0, read_flag = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
clock_t t;
@ -190,7 +190,9 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
// core loop
fwrite(opt, sizeof(gap_opt_t), 1, stdout);
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode & BWA_MODE_COMPREAD, opt->trim_qual)) != 0) {
read_flag |= (opt->mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt->mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt->trim_qual)) != 0) {
tot_seqs += n_seqs;
t = clock();
@ -246,7 +248,7 @@ int bwa_aln(int argc, char *argv[])
gap_opt_t *opt;
opt = gap_init_opt();
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012")) >= 0) {
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012I")) >= 0) {
switch (c) {
case 'n':
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
@ -273,6 +275,7 @@ int bwa_aln(int argc, char *argv[])
case '0': opt->mode |= BWA_MODE_BAM_SE; break;
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
case 'I': opt->mode |= BWA_MODE_IL13; break;
default: return 1;
}
}
@ -303,6 +306,7 @@ int bwa_aln(int argc, char *argv[])
fprintf(stderr, " -c input sequences are in the color space\n");
fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n");
fprintf(stderr, " -b the input read file is in the BAM format\n");
fprintf(stderr, " -0 use single-end reads only (effective with -b)\n");
fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n");

View File

@ -87,6 +87,7 @@ typedef struct {
#define BWA_MODE_BAM_SE 0x40
#define BWA_MODE_BAM_READ1 0x80
#define BWA_MODE_BAM_READ2 0x100
#define BWA_MODE_IL13 0x200
typedef struct {
int s_mm, s_gapo, s_gape;

2
main.c
View File

@ -3,7 +3,7 @@
#include "main.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.5.9rc1-9"
#define PACKAGE_VERSION "0.5.9rc1-r10"
#endif
static int usage()