r849: the pa tag now gives a number

... which is the ratio of this hit to the best ALT hit.
This commit is contained in:
Heng Li 2014-09-17 13:05:35 -04:00
parent 6f37c14f26
commit 825ae92e58
4 changed files with 10 additions and 13 deletions

View File

@ -12,8 +12,10 @@ having ALT contigs almost has not effect on alignments to the primary assembly
(seeding may be affected in rare corner cases). At the same time, users may
get a primary alignment to ALT contigs (no 0x800 flag) if there are no good
hits to the primary assembly, or get a supplementary alignment to ALT contigs
if it is better than hits to the primary assembly. Since this release, it is
recommended to include ALT contigs.
if it is better than hits to the primary assembly. In the latter case, the
`pa:f` tag shows the ratio of the primary hit score to the best ALT hit score,
which is no greater than 1. Since this release, it is recommended to include
ALT contigs.
Users may consider to use ALT contigs from GRCh38. I am also constructing a
non-redundant and more complete set of sequences missing from GRCh38.

View File

@ -880,13 +880,13 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
for (i = 0; i < n; ++i)
if (i != which && !(list[i].flag&0x100)) break;
if (i < n) { // there are other primary hits; output them
int has_pri_alt = 0;
int pri_alt_sc = -1;
kputsn("\tSA:Z:", 6, str);
for (i = 0; i < n; ++i) {
const mem_aln_t *r = &list[i];
int k;
if (i == which || (list[i].flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
if (list[i].is_alt) has_pri_alt = 1;
if (list[i].is_alt) pri_alt_sc = pri_alt_sc > r->score? pri_alt_sc : r->score;
kputs(bns->anns[r->rid].name, str); kputc(',', str);
kputl(r->pos+1, str); kputc(',', str);
kputc("+-"[r->is_rev], str); kputc(',', str);
@ -897,7 +897,8 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
kputc(',', str); kputw(r->NM, str);
kputc(';', str);
}
if (has_pri_alt) kputsn("\tpa:A:Y", 7, str);
if (pri_alt_sc > 0)
ksprintf(str, "\tpa:f:%.3f", (double)p->score / pri_alt_sc);
}
}
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }

View File

@ -59,19 +59,13 @@ void smem_config(smem_i *itr, int min_intv, int max_len, uint64_t max_intv)
const bwtintv_v *smem_next(smem_i *itr)
{
int i, max, max_i, ori_start;
int ori_start;
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
if (itr->start >= itr->len || itr->start < 0) return 0;
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
if (itr->start == itr->len) return 0;
ori_start = itr->start;
itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_len, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM
if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
bwtintv_t *p = &itr->matches->a[i];
int len = (uint32_t)p->info - (p->info>>32);
if (max < len) max = len, max_i = i;
}
return itr->matches;
}

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r848-dirty"
#define PACKAGE_VERSION "0.7.10-r849-dirty"
#endif
int bwa_fa2pac(int argc, char *argv[]);