updated mappy and example.c

This commit is contained in:
Heng Li 2017-10-16 11:15:07 -04:00
parent adf6cd7f52
commit b24c9c90c7
6 changed files with 28 additions and 15 deletions

View File

@ -43,8 +43,7 @@ int main(int argc, char *argv[])
mm_reg1_t *r = &reg[j]; mm_reg1_t *r = &reg[j];
assert(r->p); // with MM_F_CIGAR, this should not be NULL assert(r->p); // with MM_F_CIGAR, this should not be NULL
printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq);
r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen, r->mapq);
for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
printf("%d%c", r->p->cigar[i]>>4, "MIDSHN"[r->p->cigar[i]&0xf]); printf("%d%c", r->p->cigar[i]>>4, "MIDSHN"[r->p->cigar[i]&0xf]);
putchar('\n'); putchar('\n');

View File

@ -104,9 +104,13 @@ properties:
* **mapq**: mapping quality * **mapq**: mapping quality
* **NM**: number of mismatches and gaps in the alignment
* **blen**: length of the alignment, including both alignment matches and gaps * **blen**: length of the alignment, including both alignment matches and gaps
but excluding ambiguous bases.
* **mlen**: length of the matching bases in the alignment, excluding ambiguous
base matches.
* **NM**: number of mismatches, gaps and ambiguous poistions in the alignment
* **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the * **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the
reverse strand; 0 if unknown reverse strand; 0 if unknown
@ -124,7 +128,7 @@ the following format:
:: ::
q_st q_en strand ctg ctg_len r_st r_en blen-NM blen mapq cg:Z:cigar_str q_st q_en strand ctg ctg_len r_st r_en mlen blen mapq cg:Z:cigar_str
It is effectively the PAF format without the QueryName and QueryLength columns It is effectively the PAF format without the QueryName and QueryLength columns
(the first two columns in PAF). (the first two columns in PAF).

View File

@ -12,7 +12,7 @@ typedef struct {
const char *ctg; const char *ctg;
int32_t ctg_start, ctg_end; int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end; int32_t qry_start, qry_end;
int32_t blen, NM, ctg_len; int32_t blen, mlen, NM, ctg_len;
uint8_t mapq, is_primary; uint8_t mapq, is_primary;
int8_t strand, trans_strand; int8_t strand, trans_strand;
int32_t n_cigar32; int32_t n_cigar32;
@ -27,8 +27,9 @@ static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
h->qry_start = r->qs, h->qry_end = r->qe; h->qry_start = r->qs, h->qry_end = r->qe;
h->strand = r->rev? -1 : 1; h->strand = r->rev? -1 : 1;
h->mapq = r->mapq; h->mapq = r->mapq;
h->blen = r->p->blen; h->mlen = r->mlen;
h->NM = r->p->n_diff; h->blen = r->blen;
h->NM = r->blen - r->mlen + r->p->n_ambi;
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0; h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
h->is_primary = (r->id == r->parent); h->is_primary = (r->id == r->parent);
h->n_cigar32 = r->p->n_cigar; h->n_cigar32 = r->p->n_cigar;

View File

@ -15,17 +15,20 @@ cdef extern from "minimap.h":
int flag int flag
int bw int bw
int max_gap, max_gap_ref int max_gap, max_gap_ref
int max_frag_len
int max_chain_skip int max_chain_skip
int min_cnt int min_cnt
int min_chain_score int min_chain_score
float mask_level float mask_level
float pri_ratio float pri_ratio
int best_n int best_n
float min_iden
int max_join_long, max_join_short int max_join_long, max_join_short
int min_join_flank_sc int min_join_flank_sc
int a, b, q, e, q2, e2 int a, b, q, e, q2, e2
int noncan int noncan
int zdrop int zdrop
int end_bonus
int min_dp_max int min_dp_max
int min_ksw_len int min_ksw_len
int pe_ori, pe_bonus int pe_ori, pe_bonus
@ -86,7 +89,7 @@ cdef extern from "cmappy.h":
const char *ctg const char *ctg
int32_t ctg_start, ctg_end int32_t ctg_start, ctg_end
int32_t qry_start, qry_end int32_t qry_start, qry_end
int32_t blen, NM, ctg_len int32_t blen, mlen, NM, ctg_len
uint8_t mapq, is_primary uint8_t mapq, is_primary
int8_t strand, trans_strand int8_t strand, trans_strand
int32_t n_cigar32 int32_t n_cigar32

View File

@ -7,15 +7,15 @@ cmappy.mm_reset_timer()
cdef class Alignment: cdef class Alignment:
cdef int _ctg_len, _r_st, _r_en cdef int _ctg_len, _r_st, _r_en
cdef int _q_st, _q_en cdef int _q_st, _q_en
cdef int _NM, _blen cdef int _NM, _mlen, _blen
cdef int8_t _strand, _trans_strand cdef int8_t _strand, _trans_strand
cdef uint8_t _mapq, _is_primary cdef uint8_t _mapq, _is_primary
cdef _ctg, _cigar # these are python objects cdef _ctg, _cigar # these are python objects
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, blen, NM, trans_strand): def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand):
self._ctg, self._ctg_len, self._r_st, self._r_en = str(ctg), cl, cs, ce self._ctg, self._ctg_len, self._r_st, self._r_en = str(ctg), cl, cs, ce
self._strand, self._q_st, self._q_en = strand, qs, qe self._strand, self._q_st, self._q_en = strand, qs, qe
self._NM, self._blen = NM, blen self._NM, self._mlen, self._blen = NM, mlen, blen
self._mapq = mapq self._mapq = mapq
self._cigar = cigar self._cigar = cigar
self._is_primary = is_primary self._is_primary = is_primary
@ -39,6 +39,12 @@ cdef class Alignment:
@property @property
def trans_strand(self): return self._trans_strand def trans_strand(self): return self._trans_strand
@property
def blen(self): return self._blen
@property
def mlen(self): return self._mlen
@property @property
def NM(self): return self._NM def NM(self): return self._NM
@ -71,7 +77,7 @@ cdef class Alignment:
elif self._trans_strand < 0: ts = 'ts:A:-' elif self._trans_strand < 0: ts = 'ts:A:-'
else: ts = 'ts:A:.' else: ts = 'ts:A:.'
return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en), return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
str(self._blen - self._NM), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]) str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str])
cdef class ThreadBuffer: cdef class ThreadBuffer:
cdef cmappy.mm_tbuf_t *_b cdef cmappy.mm_tbuf_t *_b
@ -135,7 +141,7 @@ cdef class Aligner:
for k in range(h.n_cigar32): for k in range(h.n_cigar32):
c = h.cigar32[k] c = h.cigar32[k]
cigar.append([c>>4, c&0xf]) cigar.append([c>>4, c&0xf])
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.blen, h.NM, h.trans_strand) yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand)
cmappy.mm_free_reg1(&regs[i]) cmappy.mm_free_reg1(&regs[i])
free(regs) free(regs)

View File

@ -33,7 +33,7 @@ setup(
keywords = 'sequence-alignment', keywords = 'sequence-alignment',
scripts = ['python/minimap2.py'], scripts = ['python/minimap2.py'],
ext_modules = [Extension('mappy', ext_modules = [Extension('mappy',
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c',
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c'], 'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c'],
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h', depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',