diff --git a/bwamem.c b/bwamem.c index 99a6d88..535606b 100644 --- a/bwamem.c +++ b/bwamem.c @@ -968,6 +968,30 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a) return mapq; } +void mem_reorder_primary5(int T, mem_alnreg_v *a) +{ + int k, n_pri = 0, left_st = INT_MAX, left_k = -1; + mem_alnreg_t t; + for (k = 0; k < a->n; ++k) + if (a->a[k].secondary < 0 && !a->a[k].is_alt && a->a[k].score >= T) ++n_pri; + if (n_pri <= 1) return; // only one alignment + for (k = 0; k < a->n; ++k) { + mem_alnreg_t *p = &a->a[k]; + if (p->secondary >= 0 || p->is_alt || p->score < T) continue; + if (p->qb < left_st) left_st = p->qb, left_k = k; + } + assert(a->a[0].secondary < 0); + if (left_k == 0) return; // no need to reorder + t = a->a[0], a->a[0] = a->a[left_k], a->a[left_k] = t; + for (k = 1; k < a->n; ++k) { // update secondary and secondary_all + mem_alnreg_t *p = &a->a[k]; + if (p->secondary == 0) p->secondary = left_k; + else if (p->secondary == left_k) p->secondary = 0; + if (p->secondary_all == 0) p->secondary_all = left_k; + else if (p->secondary_all == left_k) p->secondary_all = 0; + } +} + // TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m) { @@ -1160,6 +1184,7 @@ static void worker2(void *data, int i, int tid) if (!(w->opt->flag&MEM_F_PE)) { if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); + if (w->opt->flag & MEM_F_PRIMARY5) mem_reorder_primary5(w->opt->T, &w->regs[i]); mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); free(w->regs[i].a); } else { diff --git a/bwamem.h b/bwamem.h index 730b603..4e2b1d8 100644 --- a/bwamem.h +++ b/bwamem.h @@ -19,6 +19,7 @@ typedef struct __smem_i smem_i; #define MEM_F_REF_HDR 0x100 #define MEM_F_SOFTCLIP 0x200 #define MEM_F_SMARTPE 0x400 +#define MEM_F_PRIMARY5 0x800 typedef struct { int a, b; // match score and mismatch penalty diff --git a/bwamem_pair.c b/bwamem_pair.c index b812cc0..9beb84b 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -243,6 +243,7 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons } void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); +void mem_reorder_primary5(int T, mem_alnreg_v *a); #define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499)) @@ -275,6 +276,10 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co } n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0); n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1); + if (opt->flag & MEM_F_PRIMARY5) { + mem_reorder_primary5(opt->T, &a[0]); + mem_reorder_primary5(opt->T, &a[1]); + } if (opt->flag&MEM_F_NOPAIRING) goto no_pairing; // pairing single-end hits if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) { diff --git a/fastmap.c b/fastmap.c index 1e49ccb..7b7145d 100644 --- a/fastmap.c +++ b/fastmap.c @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "1paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -147,6 +147,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE; else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP; else if (c == 'V') opt->flag |= MEM_F_REF_HDR; + else if (c == '5') opt->flag |= MEM_F_PRIMARY5; else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1; else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1; else if (c == 'v') bwa_verbose = atoi(optarg); @@ -265,6 +266,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); fprintf(stderr, " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]\n"); fprintf(stderr, " -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file)\n"); + fprintf(stderr, " -5 always take the leftmost alignment on a read as primary\n"); fprintf(stderr, "\n"); fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose); fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T); diff --git a/main.c b/main.c index f936a81..b652cf0 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.15-r1140" +#define PACKAGE_VERSION "0.7.15-r1142-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);