From 5ead86acd35a7703b36794fbf04973e391014ea7 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 25 Feb 2013 11:18:35 -0500 Subject: [PATCH] optionally mark split hit as secondary --- bwamem.c | 14 +++++++++----- bwamem.h | 1 + fastmap.c | 33 +++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/bwamem.c b/bwamem.c index edabd38..2d326e2 100644 --- a/bwamem.c +++ b/bwamem.c @@ -515,13 +515,15 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons p->flag |= m && m->rb >= bns->l_pac? 0x20 : 0; // is mate on reverse strand kputs(s->name, str); kputc('\t', str); if (is_mapped(p)) { // has a coordinate, no matter whether it is mapped or copied from the mate + int sam_flag = p->flag&0xff; // the flag that will be outputed to SAM; it is not always the same as p->flag + if (sam_flag&0x10000) sam_flag |= 0x100; if (!copy_mate) { cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar); p->flag |= n_cigar == 0? 4 : 0; // FIXME: check why this may happen (this has already happened) } else n_cigar = 0, cigar = 0; pos = bns_depos(bns, p->rb < bns->l_pac? p->rb : p->re - 1, &is_rev); bns_cnt_ambi(bns, pos, p->re - p->rb, &rid); - kputw(p->flag, str); kputc('\t', str); + kputw(sam_flag, str); kputc('\t', str); kputs(bns->anns[rid].name, str); kputc('\t', str); kputuw(pos - bns->anns[rid].offset + 1, str); kputc('\t', str); kputw(p->qual, str); kputc('\t', str); if (n_cigar) { @@ -626,11 +628,13 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b int mapq0 = -1; for (k = 0; k < a->n; ++k) { bwahit_t h; - if (a->a[k].secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue; - if (a->a[k].secondary >= 0 && a->a[k].score < a->a[a->a[k].secondary].score * .5) continue; - mem_alnreg2hit(&a->a[k], &h); + mem_alnreg_t *p = &a->a[k]; + if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue; + if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue; + mem_alnreg2hit(p, &h); h.flag |= extra_flag; - h.qual = a->a[k].secondary >= 0? 0 : mem_approx_mapq_se(opt, &a->a[k]); + if ((opt->flag&MEM_F_NO_MULTI) && k && p->secondary < 0) h.flag |= 0x10000; // print the sequence, but flag as secondary (for Picard) + h.qual = p->secondary >= 0? 0 : mem_approx_mapq_se(opt, p); if (k == 0) mapq0 = h.qual; else if (h.qual > mapq0) h.qual = mapq0; bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m); diff --git a/bwamem.h b/bwamem.h index 5cf3ac5..6ab2b01 100644 --- a/bwamem.h +++ b/bwamem.h @@ -15,6 +15,7 @@ typedef struct __smem_i smem_i; #define MEM_F_PE 0x2 #define MEM_F_NOPAIRING 0x4 #define MEM_F_ALL 0x8 +#define MEM_F_NO_MULTI 0x16 typedef struct { int a, b, q, r, w; diff --git a/fastmap.c b/fastmap.c index 4cf92b2..72aea0b 100644 --- a/fastmap.c +++ b/fastmap.c @@ -26,13 +26,14 @@ int main_mem(int argc, char *argv[]) void *ko = 0, *ko2 = 0; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "paCPHk:c:v:s:r:t:R:")) >= 0) { + while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1; else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; else if (c == 'H') opt->flag |= MEM_F_HARDCLIP; else if (c == 'a') opt->flag |= MEM_F_ALL; else if (c == 'p') opt->flag |= MEM_F_PE; + else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; else if (c == 'c') opt->max_occ = atoi(optarg); else if (c == 'v') bwa_verbose = atoi(optarg); else if (c == 'r') opt->split_factor = atof(optarg); @@ -43,19 +44,23 @@ int main_mem(int argc, char *argv[]) } if (optind + 1 >= argc) { fprintf(stderr, "\n"); - fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); - fprintf(stderr, "Options: -k INT minimum seed length [%d]\n", opt->min_seed_len); - fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); - fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); - fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); - fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); - fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); - fprintf(stderr, " -v INT verbose level [%d]\n", bwa_verbose); - fprintf(stderr, " -a output all alignments for SE or unpaired PE\n"); - fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); - fprintf(stderr, " -P perform mate SW only but skip pairing\n"); - fprintf(stderr, " -H hard clipping\n"); - fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n"); + fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); + fprintf(stderr, "Algorithm options:\n\n"); + fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); + fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); + fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); + fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); + fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); + fprintf(stderr, " -P skip pairing; perform mate SW only\n"); + fprintf(stderr, "\nInput/output options:\n\n"); + fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); + fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose); + fprintf(stderr, " -a output all alignments for SE or unpaired PE\n"); + fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n"); + fprintf(stderr, " -H hard clipping\n"); + fprintf(stderr, " -M mark shorter split hits as secondary (for Picard/GATK compatibility)\n"); fprintf(stderr, "\n"); free(opt); return 1;