Remove sequences marked to be filtered by Casava-1.8 with bwa aln -Y

In Casava 1.8 the fastq output changed. e.g.

@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
BBBBCCCC?<A?BC?7@@???????DBBA@@@@A@@

The part after the space, treated as comment by bwa, contains the fields:
<read number>:<is filtered>:<control number>:<barcode sequence>

With `Y' Casava indicates that a sequence should be filtered. This patch
enables bwa, with an -Y flag, to filter these sequences.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
This commit is contained in:
Roel Kluin 2011-07-10 17:04:06 +02:00
parent 8f115a8e00
commit db59a605d1
3 changed files with 11 additions and 1 deletions

View File

@ -157,6 +157,13 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri
n_seqs = 0;
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
while ((l = kseq_read(seq)) >= 0) {
if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
// skip reads that are marked to be filtered by Casava
char *s = index(seq->comment.s, ':');
if (s && *(++s) == 'Y') {
continue;
}
}
if (is_64 && seq->qual.l)
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length

View File

@ -233,7 +233,7 @@ int bwa_aln(int argc, char *argv[])
gap_opt_t *opt;
opt = gap_init_opt();
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IB:")) >= 0) {
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
switch (c) {
case 'n':
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
@ -261,6 +261,7 @@ int bwa_aln(int argc, char *argv[])
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
case 'I': opt->mode |= BWA_MODE_IL13; break;
case 'Y': opt->mode |= BWA_MODE_CFY; break;
case 'B': opt->mode |= atoi(optarg) << 24; break;
default: return 1;
}
@ -298,6 +299,7 @@ int bwa_aln(int argc, char *argv[])
fprintf(stderr, " -0 use single-end reads only (effective with -b)\n");
fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n");
fprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\n");
fprintf(stderr, " -Y filter Casava-filtered sequences\n");
fprintf(stderr, "\n");
return 1;
}

View File

@ -86,6 +86,7 @@ typedef struct {
#define BWA_MODE_GAPE 0x01
#define BWA_MODE_COMPREAD 0x02
#define BWA_MODE_LOGGAP 0x04
#define BWA_MODE_CFY 0x08
#define BWA_MODE_NONSTOP 0x10
#define BWA_MODE_BAM 0x20
#define BWA_MODE_BAM_SE 0x40