From 292f9061ab9c4b8d0c3d088f60513a84413433dc Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 26 Oct 2012 12:54:32 -0400 Subject: [PATCH] r132: optionally copy FASTA/Q comment to SAM --- bwtsw2.h | 4 ++-- bwtsw2_aux.c | 10 +++++++++- bwtsw2_main.c | 4 +++- main.c | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/bwtsw2.h b/bwtsw2.h index b1f6a3f..0ec9676 100644 --- a/bwtsw2.h +++ b/bwtsw2.h @@ -12,7 +12,7 @@ #define BSW2_FLAG_RESCUED 0x800 typedef struct { - int skip_sw:16, hard_clip:16; + int skip_sw:8, cpy_cmt:8, hard_clip:16; int a, b, q, r, t, qr, bw, max_ins, max_chain_gap; int z, is, t_seeds, multi_2nd; float mask_level, coef; @@ -45,7 +45,7 @@ typedef struct { typedef struct { int l, tid; - char *name, *seq, *qual, *sam; + char *name, *seq, *qual, *sam, *comment; } bsw2seq1_t; #ifdef __cplusplus diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index f6f6df8..5e8161c 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -55,6 +55,7 @@ bsw2opt_t *bsw2_init_opt() o->mask_level = 0.50f; o->coef = 5.5f; o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000; o->max_chain_gap = 10000; + o->cpy_cmt = 0; return o; } @@ -551,7 +552,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks if (p->flag&0x10) kputc(ks->qual[ks->l - 1 - j], &str); else kputc(ks->qual[j], &str); } - } else ksprintf(&str, "\t*"); + } else kputs("\t*", &str); // print optional tags ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d\tNM:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds, q->nm); if (q->nn) ksprintf(&str, "\tXN:i:%d", q->nn); @@ -559,6 +560,12 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks if (p->flag&BSW2_FLAG_MATESW) type |= 1; if (p->flag&BSW2_FLAG_TANDEM) type |= 2; if (type) ksprintf(&str, "\tXT:i:%d", type); + if (opt->cpy_cmt && ks->comment) { + int l = strlen(ks->comment); + if (l >= 6 && ks->comment[2] == ':' && ks->comment[4] == ':') { + kputc('\t', &str); kputs(ks->comment, &str); + } + } kputc('\n', &str); } ks->sam = str.s; @@ -756,6 +763,7 @@ static void kseq_to_bsw2seq(const kseq_t *ks, bsw2seq1_t *p) p->name = strdup(ks->name.s); p->seq = strdup(ks->seq.s); p->qual = ks->qual.l? strdup(ks->qual.s) : 0; + p->comment = ks->comment.l? strdup(ks->comment.s) : 0; p->sam = 0; } diff --git a/bwtsw2_main.c b/bwtsw2_main.c index a802ee7..e3f57f8 100644 --- a/bwtsw2_main.c +++ b/bwtsw2_main.c @@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[]) opt = bsw2_init_opt(); srand48(11); - while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:")) >= 0) { + while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; @@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[]) case 'f': xreopen(optarg, "w", stdout); break; case 'I': opt->max_ins = atoi(optarg); break; case 'S': opt->skip_sw = 1; break; + case 'C': opt->cpy_cmt = 1; break; case 'G': opt->max_chain_gap = atoi(optarg); break; } } @@ -55,6 +56,7 @@ int bwa_bwtsw2(int argc, char *argv[]) fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -f FILE file to output results to instead of stdout\n"); fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n"); + fprintf(stderr, " -C copy FASTA/Q comment to SAM output\n"); fprintf(stderr, " -M mark multi-part alignments as secondary\n"); fprintf(stderr, " -S skip Smith-Waterman read pairing\n"); fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins); diff --git a/main.c b/main.c index e394210..73cbcd9 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.6.2-r131" +#define PACKAGE_VERSION "0.6.2-r132" #endif static int usage()