r132: optionally copy FASTA/Q comment to SAM

This commit is contained in:
Heng Li 2012-10-26 12:54:32 -04:00
parent 3abfd0743a
commit 292f9061ab
4 changed files with 15 additions and 5 deletions

View File

@ -12,7 +12,7 @@
#define BSW2_FLAG_RESCUED 0x800 #define BSW2_FLAG_RESCUED 0x800
typedef struct { typedef struct {
int skip_sw:16, hard_clip:16; int skip_sw:8, cpy_cmt:8, hard_clip:16;
int a, b, q, r, t, qr, bw, max_ins, max_chain_gap; int a, b, q, r, t, qr, bw, max_ins, max_chain_gap;
int z, is, t_seeds, multi_2nd; int z, is, t_seeds, multi_2nd;
float mask_level, coef; float mask_level, coef;
@ -45,7 +45,7 @@ typedef struct {
typedef struct { typedef struct {
int l, tid; int l, tid;
char *name, *seq, *qual, *sam; char *name, *seq, *qual, *sam, *comment;
} bsw2seq1_t; } bsw2seq1_t;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -55,6 +55,7 @@ bsw2opt_t *bsw2_init_opt()
o->mask_level = 0.50f; o->coef = 5.5f; o->mask_level = 0.50f; o->coef = 5.5f;
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000; o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
o->max_chain_gap = 10000; o->max_chain_gap = 10000;
o->cpy_cmt = 0;
return o; return o;
} }
@ -551,7 +552,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
if (p->flag&0x10) kputc(ks->qual[ks->l - 1 - j], &str); if (p->flag&0x10) kputc(ks->qual[ks->l - 1 - j], &str);
else kputc(ks->qual[j], &str); else kputc(ks->qual[j], &str);
} }
} else ksprintf(&str, "\t*"); } else kputs("\t*", &str);
// print optional tags // print optional tags
ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d\tNM:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds, q->nm); ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d\tNM:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds, q->nm);
if (q->nn) ksprintf(&str, "\tXN:i:%d", q->nn); if (q->nn) ksprintf(&str, "\tXN:i:%d", q->nn);
@ -559,6 +560,12 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
if (p->flag&BSW2_FLAG_MATESW) type |= 1; if (p->flag&BSW2_FLAG_MATESW) type |= 1;
if (p->flag&BSW2_FLAG_TANDEM) type |= 2; if (p->flag&BSW2_FLAG_TANDEM) type |= 2;
if (type) ksprintf(&str, "\tXT:i:%d", type); if (type) ksprintf(&str, "\tXT:i:%d", type);
if (opt->cpy_cmt && ks->comment) {
int l = strlen(ks->comment);
if (l >= 6 && ks->comment[2] == ':' && ks->comment[4] == ':') {
kputc('\t', &str); kputs(ks->comment, &str);
}
}
kputc('\n', &str); kputc('\n', &str);
} }
ks->sam = str.s; ks->sam = str.s;
@ -756,6 +763,7 @@ static void kseq_to_bsw2seq(const kseq_t *ks, bsw2seq1_t *p)
p->name = strdup(ks->name.s); p->name = strdup(ks->name.s);
p->seq = strdup(ks->seq.s); p->seq = strdup(ks->seq.s);
p->qual = ks->qual.l? strdup(ks->qual.s) : 0; p->qual = ks->qual.l? strdup(ks->qual.s) : 0;
p->comment = ks->comment.l? strdup(ks->comment.s) : 0;
p->sam = 0; p->sam = 0;
} }

View File

@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[])
opt = bsw2_init_opt(); opt = bsw2_init_opt();
srand48(11); srand48(11);
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:")) >= 0) { while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) {
switch (c) { switch (c) {
case 'q': opt->q = atoi(optarg); break; case 'q': opt->q = atoi(optarg); break;
case 'r': opt->r = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break;
@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[])
case 'f': xreopen(optarg, "w", stdout); break; case 'f': xreopen(optarg, "w", stdout); break;
case 'I': opt->max_ins = atoi(optarg); break; case 'I': opt->max_ins = atoi(optarg); break;
case 'S': opt->skip_sw = 1; break; case 'S': opt->skip_sw = 1; break;
case 'C': opt->cpy_cmt = 1; break;
case 'G': opt->max_chain_gap = atoi(optarg); break; case 'G': opt->max_chain_gap = atoi(optarg); break;
} }
} }
@ -55,6 +56,7 @@ int bwa_bwtsw2(int argc, char *argv[])
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
fprintf(stderr, " -f FILE file to output results to instead of stdout\n"); fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n"); fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
fprintf(stderr, " -C copy FASTA/Q comment to SAM output\n");
fprintf(stderr, " -M mark multi-part alignments as secondary\n"); fprintf(stderr, " -M mark multi-part alignments as secondary\n");
fprintf(stderr, " -S skip Smith-Waterman read pairing\n"); fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins); fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h" #include "utils.h"
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.6.2-r131" #define PACKAGE_VERSION "0.6.2-r132"
#endif #endif
static int usage() static int usage()