r849: the pa tag now gives a number
... which is the ratio of this hit to the best ALT hit.
This commit is contained in:
parent
6f37c14f26
commit
825ae92e58
6
NEWS.md
6
NEWS.md
|
|
@ -12,8 +12,10 @@ having ALT contigs almost has not effect on alignments to the primary assembly
|
||||||
(seeding may be affected in rare corner cases). At the same time, users may
|
(seeding may be affected in rare corner cases). At the same time, users may
|
||||||
get a primary alignment to ALT contigs (no 0x800 flag) if there are no good
|
get a primary alignment to ALT contigs (no 0x800 flag) if there are no good
|
||||||
hits to the primary assembly, or get a supplementary alignment to ALT contigs
|
hits to the primary assembly, or get a supplementary alignment to ALT contigs
|
||||||
if it is better than hits to the primary assembly. Since this release, it is
|
if it is better than hits to the primary assembly. In the latter case, the
|
||||||
recommended to include ALT contigs.
|
`pa:f` tag shows the ratio of the primary hit score to the best ALT hit score,
|
||||||
|
which is no greater than 1. Since this release, it is recommended to include
|
||||||
|
ALT contigs.
|
||||||
|
|
||||||
Users may consider to use ALT contigs from GRCh38. I am also constructing a
|
Users may consider to use ALT contigs from GRCh38. I am also constructing a
|
||||||
non-redundant and more complete set of sequences missing from GRCh38.
|
non-redundant and more complete set of sequences missing from GRCh38.
|
||||||
|
|
|
||||||
7
bwamem.c
7
bwamem.c
|
|
@ -880,13 +880,13 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
for (i = 0; i < n; ++i)
|
for (i = 0; i < n; ++i)
|
||||||
if (i != which && !(list[i].flag&0x100)) break;
|
if (i != which && !(list[i].flag&0x100)) break;
|
||||||
if (i < n) { // there are other primary hits; output them
|
if (i < n) { // there are other primary hits; output them
|
||||||
int has_pri_alt = 0;
|
int pri_alt_sc = -1;
|
||||||
kputsn("\tSA:Z:", 6, str);
|
kputsn("\tSA:Z:", 6, str);
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
const mem_aln_t *r = &list[i];
|
const mem_aln_t *r = &list[i];
|
||||||
int k;
|
int k;
|
||||||
if (i == which || (list[i].flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
|
if (i == which || (list[i].flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
|
||||||
if (list[i].is_alt) has_pri_alt = 1;
|
if (list[i].is_alt) pri_alt_sc = pri_alt_sc > r->score? pri_alt_sc : r->score;
|
||||||
kputs(bns->anns[r->rid].name, str); kputc(',', str);
|
kputs(bns->anns[r->rid].name, str); kputc(',', str);
|
||||||
kputl(r->pos+1, str); kputc(',', str);
|
kputl(r->pos+1, str); kputc(',', str);
|
||||||
kputc("+-"[r->is_rev], str); kputc(',', str);
|
kputc("+-"[r->is_rev], str); kputc(',', str);
|
||||||
|
|
@ -897,7 +897,8 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
kputc(',', str); kputw(r->NM, str);
|
kputc(',', str); kputw(r->NM, str);
|
||||||
kputc(';', str);
|
kputc(';', str);
|
||||||
}
|
}
|
||||||
if (has_pri_alt) kputsn("\tpa:A:Y", 7, str);
|
if (pri_alt_sc > 0)
|
||||||
|
ksprintf(str, "\tpa:f:%.3f", (double)p->score / pri_alt_sc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
|
||||||
|
|
|
||||||
|
|
@ -59,19 +59,13 @@ void smem_config(smem_i *itr, int min_intv, int max_len, uint64_t max_intv)
|
||||||
|
|
||||||
const bwtintv_v *smem_next(smem_i *itr)
|
const bwtintv_v *smem_next(smem_i *itr)
|
||||||
{
|
{
|
||||||
int i, max, max_i, ori_start;
|
int ori_start;
|
||||||
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
|
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
|
||||||
if (itr->start >= itr->len || itr->start < 0) return 0;
|
if (itr->start >= itr->len || itr->start < 0) return 0;
|
||||||
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
||||||
if (itr->start == itr->len) return 0;
|
if (itr->start == itr->len) return 0;
|
||||||
ori_start = itr->start;
|
ori_start = itr->start;
|
||||||
itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_len, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM
|
itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_len, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM
|
||||||
if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
|
|
||||||
for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
|
|
||||||
bwtintv_t *p = &itr->matches->a[i];
|
|
||||||
int len = (uint32_t)p->info - (p->info>>32);
|
|
||||||
if (max < len) max = len, max_i = i;
|
|
||||||
}
|
|
||||||
return itr->matches;
|
return itr->matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -4,7 +4,7 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef PACKAGE_VERSION
|
#ifndef PACKAGE_VERSION
|
||||||
#define PACKAGE_VERSION "0.7.10-r848-dirty"
|
#define PACKAGE_VERSION "0.7.10-r849-dirty"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[]);
|
int bwa_fa2pac(int argc, char *argv[]);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue