In Casava 1.8 the fastq output changed, the name had a space which bwa
wasn't parsing correctly. This patch fixes that and enables bwa to filter sequences marked by Casava, removing this tag from the output. Signed-off-by: RoelKluin <roel.kluin@gmail.com>
This commit is contained in:
parent
d11674367d
commit
36cd4f9882
19
bwaseqio.c
19
bwaseqio.c
|
|
@ -157,6 +157,25 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri
|
||||||
n_seqs = 0;
|
n_seqs = 0;
|
||||||
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
||||||
while ((l = kseq_read(seq)) >= 0) {
|
while ((l = kseq_read(seq)) >= 0) {
|
||||||
|
// skip reads that are marked to be filtered by Casava
|
||||||
|
if (mode & BWA_MODE_CFY) {
|
||||||
|
char *s = rindex(seq->name.s, ' ');
|
||||||
|
if (s) {
|
||||||
|
*s = '\0';
|
||||||
|
for(++s; *s != '\0'; ++s) {
|
||||||
|
if (*s == ':') {
|
||||||
|
++s;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*s == 'Y')
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!s || *s != 'N') {
|
||||||
|
fprintf(stderr, "No Casava filter character found.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (is_64 && seq->qual.l)
|
if (is_64 && seq->qual.l)
|
||||||
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
|
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
|
||||||
if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
|
if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
|
||||||
|
|
|
||||||
4
bwtaln.c
4
bwtaln.c
|
|
@ -233,7 +233,7 @@ int bwa_aln(int argc, char *argv[])
|
||||||
gap_opt_t *opt;
|
gap_opt_t *opt;
|
||||||
|
|
||||||
opt = gap_init_opt();
|
opt = gap_init_opt();
|
||||||
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IB:")) >= 0) {
|
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'n':
|
case 'n':
|
||||||
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
|
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
|
||||||
|
|
@ -261,6 +261,7 @@ int bwa_aln(int argc, char *argv[])
|
||||||
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
|
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
|
||||||
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
|
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
|
||||||
case 'I': opt->mode |= BWA_MODE_IL13; break;
|
case 'I': opt->mode |= BWA_MODE_IL13; break;
|
||||||
|
case 'Y': opt->mode |= BWA_MODE_CFY; break;
|
||||||
case 'B': opt->mode |= atoi(optarg) << 24; break;
|
case 'B': opt->mode |= atoi(optarg) << 24; break;
|
||||||
default: return 1;
|
default: return 1;
|
||||||
}
|
}
|
||||||
|
|
@ -298,6 +299,7 @@ int bwa_aln(int argc, char *argv[])
|
||||||
fprintf(stderr, " -0 use single-end reads only (effective with -b)\n");
|
fprintf(stderr, " -0 use single-end reads only (effective with -b)\n");
|
||||||
fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n");
|
fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n");
|
||||||
fprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\n");
|
fprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\n");
|
||||||
|
fprintf(stderr, " -Y filter Casava-filtered sequences\n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
1
bwtaln.h
1
bwtaln.h
|
|
@ -86,6 +86,7 @@ typedef struct {
|
||||||
#define BWA_MODE_GAPE 0x01
|
#define BWA_MODE_GAPE 0x01
|
||||||
#define BWA_MODE_COMPREAD 0x02
|
#define BWA_MODE_COMPREAD 0x02
|
||||||
#define BWA_MODE_LOGGAP 0x04
|
#define BWA_MODE_LOGGAP 0x04
|
||||||
|
#define BWA_MODE_CFY 0x08
|
||||||
#define BWA_MODE_NONSTOP 0x10
|
#define BWA_MODE_NONSTOP 0x10
|
||||||
#define BWA_MODE_BAM 0x20
|
#define BWA_MODE_BAM 0x20
|
||||||
#define BWA_MODE_BAM_SE 0x40
|
#define BWA_MODE_BAM_SE 0x40
|
||||||
|
|
|
||||||
2
kseq.h
2
kseq.h
|
|
@ -102,7 +102,7 @@ typedef struct __kstring_t {
|
||||||
if (ks->buf[i] == delimiter) break; \
|
if (ks->buf[i] == delimiter) break; \
|
||||||
} else { \
|
} else { \
|
||||||
for (i = ks->begin; i < ks->end; ++i) \
|
for (i = ks->begin; i < ks->end; ++i) \
|
||||||
if (isspace(ks->buf[i])) break; \
|
if (isspace(ks->buf[i]) && (ks->buf[i] != ' ')) break; \
|
||||||
} \
|
} \
|
||||||
if (str->m - str->l < i - ks->begin + 1) { \
|
if (str->m - str->l < i - ks->begin + 1) { \
|
||||||
str->m = str->l + (i - ks->begin) + 1; \
|
str->m = str->l + (i - ks->begin) + 1; \
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue