r725: optionally disable hard clipping
as is reqested by the cancer group
This commit is contained in:
parent
1c19bc630f
commit
8c12ec4a4b
2
Makefile
2
Makefile
|
|
@ -1,6 +1,6 @@
|
|||
CC= gcc
|
||||
#CC= clang --analyze
|
||||
CFLAGS= -g -Wall -O2
|
||||
CFLAGS= -g -Wall -O2 -Wno-unused-function
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
|
|
|
|||
21
bwamem.c
21
bwamem.c
|
|
@ -689,7 +689,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
|
|||
return l;
|
||||
}
|
||||
|
||||
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
|
||||
void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all)
|
||||
{
|
||||
int i;
|
||||
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
|
||||
|
|
@ -716,7 +716,8 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
if (p->n_cigar) { // aligned
|
||||
for (i = 0; i < p->n_cigar; ++i) {
|
||||
int c = p->cigar[i]&0xf;
|
||||
if (c == 3 || c == 4) c = which? 4 : 3; // use hard clipping for supplementary alignments
|
||||
if (!softclip_all && (c == 3 || c == 4))
|
||||
c = which? 4 : 3; // use hard clipping for supplementary alignments
|
||||
kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str);
|
||||
}
|
||||
} else kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true)
|
||||
|
|
@ -743,9 +744,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
kputsn("*\t*", 3, str);
|
||||
} else if (!p->is_rev) { // the forward strand
|
||||
int i, qb = 0, qe = s->l_seq;
|
||||
if (p->n_cigar) {
|
||||
if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qb += p->cigar[0]>>4;
|
||||
if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qe -= p->cigar[p->n_cigar-1]>>4;
|
||||
if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all
|
||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
|
||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
|
||||
}
|
||||
ks_resize(str, str->l + (qe - qb) + 1);
|
||||
for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]];
|
||||
|
|
@ -757,9 +758,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
|
|||
} else kputc('*', str);
|
||||
} else { // the reverse strand
|
||||
int i, qb = 0, qe = s->l_seq;
|
||||
if (p->n_cigar) {
|
||||
if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qe -= p->cigar[0]>>4;
|
||||
if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qb += p->cigar[p->n_cigar-1]>>4;
|
||||
if (p->n_cigar && which && !softclip_all) {
|
||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
|
||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
|
||||
}
|
||||
ks_resize(str, str->l + (qe - qb) + 1);
|
||||
for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]];
|
||||
|
|
@ -860,10 +861,10 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
|
|||
mem_aln_t t;
|
||||
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
||||
t.flag |= extra_flag;
|
||||
mem_aln2sam(bns, &str, s, 1, &t, 0, m);
|
||||
mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP);
|
||||
} else {
|
||||
for (k = 0; k < aa.n; ++k)
|
||||
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m);
|
||||
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP);
|
||||
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||
free(aa.a);
|
||||
}
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -16,6 +16,7 @@ typedef struct __smem_i smem_i;
|
|||
#define MEM_F_ALL 0x8
|
||||
#define MEM_F_NO_MULTI 0x10
|
||||
#define MEM_F_NO_RESCUE 0x20
|
||||
#define MEM_F_SOFTCLIP 0x200
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r
|
||||
|
|
|
|||
|
|
@ -238,7 +238,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
||||
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
|
||||
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all);
|
||||
|
||||
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
|
||||
kstring_t str;
|
||||
|
|
@ -301,8 +301,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
// write SAM
|
||||
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag;
|
||||
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag;
|
||||
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
|
||||
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
|
||||
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0;
|
||||
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s;
|
||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
free(h[0].cigar); free(h[1].cigar);
|
||||
} else goto no_pairing;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[])
|
|||
int64_t n_processed = 0;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "paMCSPHIk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'w') opt->w = atoi(optarg);
|
||||
else if (c == 'A') opt->a = atoi(optarg);
|
||||
|
|
@ -45,6 +45,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'p') opt->flag |= MEM_F_PE;
|
||||
else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
|
||||
else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
|
||||
else if (c == 'I') opt->flag |= MEM_F_SOFTCLIP;
|
||||
else if (c == 'c') opt->max_occ = atoi(optarg);
|
||||
else if (c == 'd') opt->zdrop = atoi(optarg);
|
||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||
|
|
@ -93,6 +94,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
|
||||
fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T);
|
||||
fprintf(stderr, " -I use soft clipping for supplementary alignments\n");
|
||||
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
|
||||
fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n");
|
||||
fprintf(stderr, " -M mark shorter split hits as secondary (for Picard/GATK compatibility)\n");
|
||||
|
|
|
|||
Loading…
Reference in New Issue