From 8c12ec4a4b42755a5f9535de33c09b88958dc122 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 24 Apr 2014 11:56:43 -0400 Subject: [PATCH] r725: optionally disable hard clipping as is reqested by the cancer group --- Makefile | 2 +- bwamem.c | 21 +++++++++++---------- bwamem.h | 1 + bwamem_pair.c | 6 +++--- fastmap.c | 4 +++- main.c | 2 +- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index ff48a20..6490932 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CC= gcc #CC= clang --analyze -CFLAGS= -g -Wall -O2 +CFLAGS= -g -Wall -O2 -Wno-unused-function WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) diff --git a/bwamem.c b/bwamem.c index 19ca561..6576475 100644 --- a/bwamem.c +++ b/bwamem.c @@ -689,7 +689,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar) return l; } -void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_) +void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all) { int i; mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert @@ -716,7 +716,8 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m if (p->n_cigar) { // aligned for (i = 0; i < p->n_cigar; ++i) { int c = p->cigar[i]&0xf; - if (c == 3 || c == 4) c = which? 4 : 3; // use hard clipping for supplementary alignments + if (!softclip_all && (c == 3 || c == 4)) + c = which? 4 : 3; // use hard clipping for supplementary alignments kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str); } } else kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true) @@ -743,9 +744,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m kputsn("*\t*", 3, str); } else if (!p->is_rev) { // the forward strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar) { - if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qb += p->cigar[0]>>4; - if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qe -= p->cigar[p->n_cigar-1]>>4; + if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all + if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4; + if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4; } ks_resize(str, str->l + (qe - qb) + 1); for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]]; @@ -757,9 +758,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m } else kputc('*', str); } else { // the reverse strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar) { - if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qe -= p->cigar[0]>>4; - if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qb += p->cigar[p->n_cigar-1]>>4; + if (p->n_cigar && which && !softclip_all) { + if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4; + if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4; } ks_resize(str, str->l + (qe - qb) + 1); for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]]; @@ -860,10 +861,10 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa mem_aln_t t; t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0); t.flag |= extra_flag; - mem_aln2sam(bns, &str, s, 1, &t, 0, m); + mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP); } else { for (k = 0; k < aa.n; ++k) - mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m); + mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP); for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar); free(aa.a); } diff --git a/bwamem.h b/bwamem.h index 8b24c51..a357965 100644 --- a/bwamem.h +++ b/bwamem.h @@ -16,6 +16,7 @@ typedef struct __smem_i smem_i; #define MEM_F_ALL 0x8 #define MEM_F_NO_MULTI 0x10 #define MEM_F_NO_RESCUE 0x20 +#define MEM_F_SOFTCLIP 0x200 typedef struct { int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r diff --git a/bwamem_pair.c b/bwamem_pair.c index f1aa73a..eb17011 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -238,7 +238,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a); extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); - extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); + extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all); int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1; kstring_t str; @@ -301,8 +301,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co // write SAM h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; - mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0; - mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s; + mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0; + mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s; if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); free(h[1].cigar); } else goto no_pairing; diff --git a/fastmap.c b/fastmap.c index 72d850c..daa6160 100644 --- a/fastmap.c +++ b/fastmap.c @@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[]) int64_t n_processed = 0; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) { + while ((c = getopt(argc, argv, "paMCSPHIk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 'w') opt->w = atoi(optarg); else if (c == 'A') opt->a = atoi(optarg); @@ -45,6 +45,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'p') opt->flag |= MEM_F_PE; else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE; + else if (c == 'I') opt->flag |= MEM_F_SOFTCLIP; else if (c == 'c') opt->max_occ = atoi(optarg); else if (c == 'd') opt->zdrop = atoi(optarg); else if (c == 'v') bwa_verbose = atoi(optarg); @@ -93,6 +94,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "\n"); fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose); fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T); + fprintf(stderr, " -I use soft clipping for supplementary alignments\n"); fprintf(stderr, " -a output all alignments for SE or unpaired PE\n"); fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n"); fprintf(stderr, " -M mark shorter split hits as secondary (for Picard/GATK compatibility)\n"); diff --git a/main.c b/main.c index a8df9c0..6a7242a 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.7-r441" +#define PACKAGE_VERSION "0.7.7-r725-patch" #endif int bwa_fa2pac(int argc, char *argv[]);