Added barcode support
This commit is contained in:
parent
10721ca602
commit
51d354cd28
|
|
@ -1,3 +1,4 @@
|
|||
*.[oa]
|
||||
bwa
|
||||
test
|
||||
.*.swp
|
||||
|
|
|
|||
11
bwape.c
11
bwape.c
|
|
@ -645,13 +645,13 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
|
|||
void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt)
|
||||
{
|
||||
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
|
||||
int i, j, n_seqs, tot_seqs = 0, read_flag = 0;
|
||||
int i, j, n_seqs, tot_seqs = 0;
|
||||
bwa_seq_t *seqs[2];
|
||||
bwa_seqio_t *ks[2];
|
||||
clock_t t;
|
||||
bntseq_t *bns, *ntbns = 0;
|
||||
FILE *fp_sa[2];
|
||||
gap_opt_t opt;
|
||||
gap_opt_t opt, opt0;
|
||||
khint_t iter;
|
||||
isize_info_t last_ii; // this is for the last batch of reads
|
||||
char str[1024];
|
||||
|
|
@ -671,6 +671,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
|||
|
||||
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
|
||||
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
|
||||
opt0 = opt;
|
||||
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
|
||||
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
|
||||
if (!(opt.mode & BWA_MODE_COMPREAD)) {
|
||||
|
|
@ -691,14 +692,12 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
|||
// core loop
|
||||
bwa_print_sam_SQ(bns);
|
||||
bwa_print_sam_PG();
|
||||
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
|
||||
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
|
||||
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
|
||||
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt0.mode, opt0.trim_qual)) != 0) {
|
||||
int cnt_chg;
|
||||
isize_info_t ii;
|
||||
ubyte_t *pacseq;
|
||||
|
||||
seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, read_flag, opt.trim_qual);
|
||||
seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode, opt.trim_qual);
|
||||
tot_seqs += n_seqs;
|
||||
t = clock();
|
||||
|
||||
|
|
|
|||
8
bwase.c
8
bwase.c
|
|
@ -472,6 +472,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
|||
} else printf("*");
|
||||
|
||||
if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
|
||||
if (p->bc[0]) printf("\tBC:Z:%s", p->bc);
|
||||
if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
|
||||
if (p->type != BWA_TYPE_NO_MATCH) {
|
||||
int i;
|
||||
|
|
@ -519,6 +520,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
|||
printf("%s", p->qual);
|
||||
} else printf("*");
|
||||
if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
|
||||
if (p->bc[0]) printf("\tBC:Z:%s", p->bc);
|
||||
if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
|
||||
putchar('\n');
|
||||
}
|
||||
|
|
@ -587,7 +589,7 @@ int bwa_set_rg(const char *s)
|
|||
void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ)
|
||||
{
|
||||
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
|
||||
int i, n_seqs, tot_seqs = 0, m_aln, read_flag = 0;
|
||||
int i, n_seqs, tot_seqs = 0, m_aln;
|
||||
bwt_aln1_t *aln = 0;
|
||||
bwa_seq_t *seqs;
|
||||
bwa_seqio_t *ks;
|
||||
|
|
@ -611,9 +613,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
|||
// set ks
|
||||
ks = bwa_open_reads(opt.mode, fn_fa);
|
||||
// core loop
|
||||
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
|
||||
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
|
||||
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
|
||||
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode, opt.trim_qual)) != 0) {
|
||||
tot_seqs += n_seqs;
|
||||
t = clock();
|
||||
|
||||
|
|
|
|||
28
bwaseqio.c
28
bwaseqio.c
|
|
@ -1,4 +1,5 @@
|
|||
#include <zlib.h>
|
||||
#include <ctype.h>
|
||||
#include "bwtaln.h"
|
||||
#include "utils.h"
|
||||
#include "bamlite.h"
|
||||
|
|
@ -139,20 +140,41 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
|||
return seqs;
|
||||
}
|
||||
|
||||
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int flag, int trim_qual)
|
||||
#define BARCODE_LOW_QUAL 13
|
||||
|
||||
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
|
||||
{
|
||||
bwa_seq_t *seqs, *p;
|
||||
kseq_t *seq = bs->ks;
|
||||
int n_seqs, l, i, is_comp = flag&1, is_64 = flag&2;
|
||||
int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
|
||||
long n_trimmed = 0, n_tot = 0;
|
||||
|
||||
if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual);
|
||||
if (l_bc > 15) {
|
||||
fprintf(stderr, "[%s] the maximum barcode length is 15.\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
|
||||
n_seqs = 0;
|
||||
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
||||
while ((l = kseq_read(seq)) >= 0) {
|
||||
if (is_64 && seq->qual.l)
|
||||
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
|
||||
if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
|
||||
p = &seqs[n_seqs++];
|
||||
if (l_bc) { // then trim barcode
|
||||
for (i = 0; i < l_bc; ++i)
|
||||
p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
|
||||
p->bc[i] = 0;
|
||||
for (; i < seq->seq.l; ++i)
|
||||
seq->seq.s[i - l_bc] = seq->seq.s[i];
|
||||
seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
|
||||
if (seq->qual.l) {
|
||||
for (i = l_bc; i < seq->qual.l; ++i)
|
||||
seq->qual.s[i - l_bc] = seq->qual.s[i];
|
||||
seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
|
||||
}
|
||||
l = seq->seq.l;
|
||||
} else p->bc[0] = 0;
|
||||
p->tid = -1; // no assigned to a thread
|
||||
p->qual = 0;
|
||||
p->full_len = p->clip_len = p->len = l;
|
||||
|
|
|
|||
10
bwtaln.c
10
bwtaln.c
|
|
@ -172,7 +172,7 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)
|
|||
|
||||
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
|
||||
{
|
||||
int i, n_seqs, tot_seqs = 0, read_flag = 0;
|
||||
int i, n_seqs, tot_seqs = 0;
|
||||
bwa_seq_t *seqs;
|
||||
bwa_seqio_t *ks;
|
||||
clock_t t;
|
||||
|
|
@ -190,9 +190,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
|
|||
|
||||
// core loop
|
||||
fwrite(opt, sizeof(gap_opt_t), 1, stdout);
|
||||
read_flag |= (opt->mode & BWA_MODE_COMPREAD)? 1 : 0;
|
||||
read_flag |= ((opt->mode & BWA_MODE_IL13)? 1 : 0)<<1;
|
||||
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt->trim_qual)) != 0) {
|
||||
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
|
||||
tot_seqs += n_seqs;
|
||||
t = clock();
|
||||
|
||||
|
|
@ -248,7 +246,7 @@ int bwa_aln(int argc, char *argv[])
|
|||
gap_opt_t *opt;
|
||||
|
||||
opt = gap_init_opt();
|
||||
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012I")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IB:")) >= 0) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
|
||||
|
|
@ -276,6 +274,7 @@ int bwa_aln(int argc, char *argv[])
|
|||
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
|
||||
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
|
||||
case 'I': opt->mode |= BWA_MODE_IL13; break;
|
||||
case 'B': opt->mode |= atoi(optarg) << 24; break;
|
||||
default: return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -303,6 +302,7 @@ int bwa_aln(int argc, char *argv[])
|
|||
fprintf(stderr, " -R INT stop searching when there are >INT equally best hits [%d]\n", opt->max_top2);
|
||||
fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
|
||||
fprintf(stderr, " -f FILE file to write output to instead of stdout\n");
|
||||
fprintf(stderr, " -B INT length of barcode\n");
|
||||
fprintf(stderr, " -c input sequences are in the color space\n");
|
||||
fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
|
||||
fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
|
||||
|
|
|
|||
6
bwtaln.h
6
bwtaln.h
|
|
@ -74,6 +74,8 @@ typedef struct {
|
|||
bwa_cigar_t *cigar;
|
||||
// for multi-threading only
|
||||
int tid;
|
||||
// barcode
|
||||
char bc[16]; // null terminated; up to 15 bases
|
||||
// NM and MD tags
|
||||
uint32_t full_len:20, nm:12;
|
||||
char *md;
|
||||
|
|
@ -91,7 +93,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
int s_mm, s_gapo, s_gape;
|
||||
int mode;
|
||||
int mode; // bit 24-31 are the barcode length
|
||||
int indel_end_skip, max_del_occ, max_entries;
|
||||
float fnr;
|
||||
int max_diff, max_gapo, max_gape;
|
||||
|
|
@ -126,7 +128,7 @@ extern "C" {
|
|||
bwa_seqio_t *bwa_bam_open(const char *fn, int which);
|
||||
void bwa_seq_close(bwa_seqio_t *bs);
|
||||
void seq_reverse(int len, ubyte_t *seq, int is_comp);
|
||||
bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int is_comp, int trim_qual);
|
||||
bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
|
||||
void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);
|
||||
|
||||
int bwa_cal_maxdiff(int l, double err, double thres);
|
||||
|
|
|
|||
Loading…
Reference in New Issue