Merge branch 'dev'
This commit is contained in:
commit
b9026ecafd
27
bwamem.c
27
bwamem.c
|
|
@ -78,6 +78,7 @@ mem_opt_t *mem_opt_init()
|
||||||
o->min_chain_weight = 0;
|
o->min_chain_weight = 0;
|
||||||
o->max_chain_extend = 1<<30;
|
o->max_chain_extend = 1<<30;
|
||||||
o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len);
|
o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len);
|
||||||
|
o->min_pa_ratio = 0.8;
|
||||||
bwa_fill_scmat(o->a, o->b, o->mat);
|
bwa_fill_scmat(o->a, o->b, o->mat);
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
@ -514,14 +515,18 @@ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id
|
||||||
int_v z = {0,0,0};
|
int_v z = {0,0,0};
|
||||||
if (n == 0) return 0;
|
if (n == 0) return 0;
|
||||||
for (i = n_pri = 0; i < n; ++i) {
|
for (i = n_pri = 0; i < n; ++i) {
|
||||||
a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
|
a[i].sub = 0, a[i].alt_sc = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
|
||||||
if (!a[i].is_alt) ++n_pri;
|
if (!a[i].is_alt) ++n_pri;
|
||||||
}
|
}
|
||||||
ks_introsort(mem_ars_hash, n, a);
|
ks_introsort(mem_ars_hash, n, a);
|
||||||
mem_mark_primary_se_core(opt, n, a, &z);
|
mem_mark_primary_se_core(opt, n, a, &z);
|
||||||
for (i = 0; i < n; ++i) // don't track the "parent" hit of ALT secondary hits
|
for (i = 0; i < n; ++i) {
|
||||||
if (a[i].is_alt && a[i].secondary >= 0)
|
mem_alnreg_t *p = &a[i];
|
||||||
a[i].secondary = INT_MAX;
|
if (p->is_alt && p->secondary >= 0) // don't track the "parent" hit of ALT secondary hits
|
||||||
|
p->secondary = INT_MAX;
|
||||||
|
if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt)
|
||||||
|
p->alt_sc = a[p->secondary].score;
|
||||||
|
}
|
||||||
if (n_pri > 0 || n_pri != n) {
|
if (n_pri > 0 || n_pri != n) {
|
||||||
ks_introsort(mem_ars_hash2, n, a);
|
ks_introsort(mem_ars_hash2, n, a);
|
||||||
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
|
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
|
||||||
|
|
@ -808,7 +813,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
if (p->rid >= 0) { // with coordinate
|
if (p->rid >= 0) { // with coordinate
|
||||||
kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME
|
kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME
|
||||||
kputl(p->pos + 1, str); kputc('\t', str); // POS
|
kputl(p->pos + 1, str); kputc('\t', str); // POS
|
||||||
kputw(p->mapq, str); kputc('\t', str); // MAPQ
|
kputw(p->score < opt->min_pa_ratio * p->alt_sc? 0 : p->mapq, str); kputc('\t', str); // MAPQ
|
||||||
if (p->n_cigar) { // aligned
|
if (p->n_cigar) { // aligned
|
||||||
for (i = 0; i < p->n_cigar; ++i) {
|
for (i = 0; i < p->n_cigar; ++i) {
|
||||||
int c = p->cigar[i]&0xf;
|
int c = p->cigar[i]&0xf;
|
||||||
|
|
@ -880,13 +885,11 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
for (i = 0; i < n; ++i)
|
for (i = 0; i < n; ++i)
|
||||||
if (i != which && !(list[i].flag&0x100)) break;
|
if (i != which && !(list[i].flag&0x100)) break;
|
||||||
if (i < n) { // there are other primary hits; output them
|
if (i < n) { // there are other primary hits; output them
|
||||||
int pri_alt_sc = -1;
|
|
||||||
kputsn("\tSA:Z:", 6, str);
|
kputsn("\tSA:Z:", 6, str);
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
const mem_aln_t *r = &list[i];
|
const mem_aln_t *r = &list[i];
|
||||||
int k;
|
int k;
|
||||||
if (i == which || (list[i].flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
|
if (i == which || (r->flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
|
||||||
if (list[i].is_alt) pri_alt_sc = pri_alt_sc > r->score? pri_alt_sc : r->score;
|
|
||||||
kputs(bns->anns[r->rid].name, str); kputc(',', str);
|
kputs(bns->anns[r->rid].name, str); kputc(',', str);
|
||||||
kputl(r->pos+1, str); kputc(',', str);
|
kputl(r->pos+1, str); kputc(',', str);
|
||||||
kputc("+-"[r->is_rev], str); kputc(',', str);
|
kputc("+-"[r->is_rev], str); kputc(',', str);
|
||||||
|
|
@ -897,9 +900,11 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
kputc(',', str); kputw(r->NM, str);
|
kputc(',', str); kputw(r->NM, str);
|
||||||
kputc(';', str);
|
kputc(';', str);
|
||||||
}
|
}
|
||||||
if (pri_alt_sc > 0)
|
|
||||||
ksprintf(str, "\tpa:f:%.3f", (double)p->score / pri_alt_sc);
|
|
||||||
}
|
}
|
||||||
|
if (p->alt_sc > 0)
|
||||||
|
ksprintf(str, "\tpa:f:%.3f", (double)p->score / p->alt_sc);
|
||||||
|
if (p->score < opt->min_pa_ratio * p->alt_sc)
|
||||||
|
ksprintf(str, "\tom:i:%d", p->mapq);
|
||||||
}
|
}
|
||||||
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
||||||
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
|
||||||
|
|
@ -1098,7 +1103,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
||||||
assert(a.rid == ar->rid);
|
assert(a.rid == ar->rid);
|
||||||
a.pos = pos - bns->anns[a.rid].offset;
|
a.pos = pos - bns->anns[a.rid].offset;
|
||||||
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
|
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
|
||||||
a.is_alt = ar->is_alt;
|
a.is_alt = ar->is_alt; a.alt_sc = ar->alt_sc;
|
||||||
free(query);
|
free(query);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
4
bwamem.h
4
bwamem.h
|
|
@ -48,6 +48,7 @@ typedef struct {
|
||||||
float XA_drop_ratio; // when counting hits for the XA tag, ignore alignments with score < XA_drop_ratio * max_score; only effective for the XA tag
|
float XA_drop_ratio; // when counting hits for the XA tag, ignore alignments with score < XA_drop_ratio * max_score; only effective for the XA tag
|
||||||
float mask_level_redun;
|
float mask_level_redun;
|
||||||
float mapQ_coef_len;
|
float mapQ_coef_len;
|
||||||
|
float min_pa_ratio;
|
||||||
int mapQ_coef_fac;
|
int mapQ_coef_fac;
|
||||||
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
||||||
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||||
|
|
@ -62,6 +63,7 @@ typedef struct {
|
||||||
int score; // best local SW score
|
int score; // best local SW score
|
||||||
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
|
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
|
||||||
int sub; // 2nd best SW score
|
int sub; // 2nd best SW score
|
||||||
|
int alt_sc;
|
||||||
int csub; // SW score of a tandem hit
|
int csub; // SW score of a tandem hit
|
||||||
int sub_n; // approximate number of suboptimal hits
|
int sub_n; // approximate number of suboptimal hits
|
||||||
int w; // actual band width used in extension
|
int w; // actual band width used in extension
|
||||||
|
|
@ -90,7 +92,7 @@ typedef struct { // This struct is only used for the convenience of API.
|
||||||
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
|
||||||
char *XA; // alternative mappings
|
char *XA; // alternative mappings
|
||||||
|
|
||||||
int score, sub;
|
int score, sub, alt_sc;
|
||||||
} mem_aln_t;
|
} mem_aln_t;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -240,6 +240,19 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
|
||||||
|
|
||||||
|
static inline void mem_reg2sam_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, kstring_t *str, bseq1_t *s, mem_alnreg_t *p, int extra_flag, const mem_aln_t *m)
|
||||||
|
{ // a simplified version of mem_reg2sam()
|
||||||
|
mem_aln_t t;
|
||||||
|
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt) return;
|
||||||
|
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
|
||||||
|
t.flag |= extra_flag;
|
||||||
|
t.flag |= (opt->flag&MEM_F_NO_MULTI)? 0x10000 : 0x800;
|
||||||
|
mem_aln2sam(opt, bns, str, s, 1, &t, 0, m);
|
||||||
|
free(t.cigar);
|
||||||
|
}
|
||||||
|
|
||||||
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
||||||
|
|
||||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||||
|
|
@ -247,7 +260,6 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||||
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
||||||
extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
|
|
||||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
||||||
|
|
||||||
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2];
|
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2];
|
||||||
|
|
@ -327,8 +339,12 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
// write SAM
|
// write SAM
|
||||||
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
|
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
|
||||||
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
|
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
|
||||||
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
|
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); // write the primary hit for read1
|
||||||
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
|
if (n_pri[0] < a[0].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[0], &a[0].a[n_pri[0]], 0x41|extra_flag, &h[1]);
|
||||||
|
s[0].sam = strdup(str.s); str.l = 0;
|
||||||
|
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); // write the primary hit for read2
|
||||||
|
if (n_pri[1] < a[1].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[1], &a[1].a[n_pri[1]], 0x81|extra_flag, &h[0]);
|
||||||
|
s[1].sam = str.s;
|
||||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||||
free(h[0].cigar); // h[0].XA will be freed in the following block
|
free(h[0].cigar); // h[0].XA will be freed in the following block
|
||||||
free(h[1].cigar);
|
free(h[1].cigar);
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[])
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||||
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:")) >= 0) {
|
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:g:")) >= 0) {
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||||
else if (c == 'x') mode = optarg;
|
else if (c == 'x') mode = optarg;
|
||||||
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
||||||
|
|
@ -85,6 +85,7 @@ int main_mem(int argc, char *argv[])
|
||||||
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
|
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
|
||||||
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
|
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
|
||||||
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
|
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
|
||||||
|
else if (c == 'g') opt->min_pa_ratio = atof(optarg), opt0.min_pa_ratio = 1;
|
||||||
else if (c == 'C') copy_comment = 1;
|
else if (c == 'C') copy_comment = 1;
|
||||||
else if (c == 'K') fixed_chunk_size = atoi(optarg);
|
else if (c == 'K') fixed_chunk_size = atoi(optarg);
|
||||||
else if (c == 'Q') {
|
else if (c == 'Q') {
|
||||||
|
|
@ -138,7 +139,7 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
|
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
|
||||||
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);
|
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);
|
||||||
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
||||||
fprintf(stderr, " -y INT find MEMs longer than {-k} * {-r} with size less than INT [%ld]\n", (long)opt->max_mem_intv);
|
fprintf(stderr, " -y INT find MEMs longer than {-k} * {-r} with size less than INT (EXPERIMENTAL) [%ld]\n", (long)opt->max_mem_intv);
|
||||||
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
||||||
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
||||||
fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio);
|
fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio);
|
||||||
|
|
@ -164,6 +165,7 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
|
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
|
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
|
||||||
|
fprintf(stderr, " -g FLOAT set mapQ to zero if the ratio of the primary-to-alt scores below FLOAT [%.3f]\n", opt->min_pa_ratio);
|
||||||
fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T);
|
fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T);
|
||||||
fprintf(stderr, " -h INT if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
|
fprintf(stderr, " -h INT if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
|
||||||
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
|
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -4,7 +4,7 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef PACKAGE_VERSION
|
#ifndef PACKAGE_VERSION
|
||||||
#define PACKAGE_VERSION "0.7.10-r849-dirty"
|
#define PACKAGE_VERSION "0.7.10-r855-dirty"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[]);
|
int bwa_fa2pac(int argc, char *argv[]);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue