r296: expose splicing related options to CLI
This commit is contained in:
parent
28f86688ab
commit
b5f5929bf9
15
align.c
15
align.c
|
|
@ -136,7 +136,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|||
for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr);
|
||||
}
|
||||
if (opt->flag & MM_F_SPLICE)
|
||||
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, opt->zdrop, flag|KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV, ez);
|
||||
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, opt->zdrop, flag, ez);
|
||||
else if (opt->q == opt->q2 && opt->e == opt->e2)
|
||||
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, opt->zdrop, flag, ez);
|
||||
else
|
||||
|
|
@ -249,7 +249,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
{
|
||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
|
||||
uint8_t *tseq, *qseq;
|
||||
int32_t i, l, bw, dropped = 0, rs0, re0, qs0, qe0;
|
||||
int32_t i, l, bw, dropped = 0, extra_flag = 0, rs0, re0, qs0, qe0;
|
||||
int32_t rs, re, qs, qe;
|
||||
int32_t rs1, qs1, re1, qe1;
|
||||
int8_t mat[25];
|
||||
|
|
@ -266,6 +266,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
|
||||
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
|
||||
|
||||
if (opt->flag & MM_F_SPLICE) {
|
||||
if (opt->flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
|
||||
if (opt->flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
|
||||
}
|
||||
|
||||
// compute rs0 and qs0
|
||||
if (r->split && as1 > 0) {
|
||||
mm_adjust_minier(mi, qseq0, &a[as1-1], &rs0, &qs0);
|
||||
|
|
@ -298,7 +303,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
mm_idx_getseq(mi, rid, rs0, rs, tseq);
|
||||
mm_seq_rev(qs - qs0, qseq);
|
||||
mm_seq_rev(rs - rs0, tseq);
|
||||
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
if (ez->n_cigar > 0) {
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
r->p->dp_score += ez->max;
|
||||
|
|
@ -320,7 +325,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
bw1 = qe - qs > re - rs? qe - qs : re - rs;
|
||||
qseq = &qseq0[rev][qs];
|
||||
mm_idx_getseq(mi, rid, rs, re, tseq);
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, KSW_EZ_APPROX_MAX, ez);
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, extra_flag|KSW_EZ_APPROX_MAX, ez);
|
||||
if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, 0, ez);
|
||||
if (ez->n_cigar > 0)
|
||||
|
|
@ -345,7 +350,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
if (!dropped && qe < qe0 && re < re0) { // right extension
|
||||
qseq = &qseq0[rev][qe];
|
||||
mm_idx_getseq(mi, rid, re, re0, tseq);
|
||||
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, KSW_EZ_EXTZ_ONLY, ez);
|
||||
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
|
||||
if (ez->n_cigar > 0) {
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
r->p->dp_score += ez->max;
|
||||
|
|
|
|||
20
main.c
20
main.c
|
|
@ -8,7 +8,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r295-dirty"
|
||||
#define MM_VERSION "2.0-r296-dirty"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -32,6 +32,7 @@ static struct option long_options[] = {
|
|||
{ "min-dp-len", required_argument, 0, 0 },
|
||||
{ "print-aln-seq", no_argument, 0, 0 },
|
||||
{ "splice", no_argument, 0, 0 },
|
||||
{ "cost-non-gt-ag", required_argument, 0, 0 },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
{ "min-count", required_argument, 0, 'n' },
|
||||
|
|
@ -67,7 +68,7 @@ int main(int argc, char *argv[])
|
|||
mm_realtime0 = realtime();
|
||||
mm_mapopt_init(&opt);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Q", long_options, &long_idx)) >= 0) {
|
||||
while ((c = getopt_long(argc, argv, "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:", long_options, &long_idx)) >= 0) {
|
||||
if (c == 'w') w = atoi(optarg), idx_par_set = 1;
|
||||
else if (c == 'k') k = atoi(optarg), idx_par_set = 1;
|
||||
else if (c == 'H') is_hpc = 1, idx_par_set = 1;
|
||||
|
|
@ -105,9 +106,19 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
|
||||
else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ; // --print-aln-seq
|
||||
else if (c == 0 && long_idx ==10) opt.flag |= MM_F_SPLICE; // --splice
|
||||
else if (c == 0 && long_idx ==11) opt.noncan = atoi(optarg); // --cost-non-gt-ag
|
||||
else if (c == 'V') {
|
||||
puts(MM_VERSION);
|
||||
return 0;
|
||||
} else if (c == 'u') {
|
||||
if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV;
|
||||
else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV;
|
||||
else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR;
|
||||
else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV);
|
||||
else {
|
||||
fprintf(stderr, "[E::%s] unrecognized cDNA direction\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
} else if (c == 'O') {
|
||||
opt.q = opt.q2 = strtol(optarg, &s, 10);
|
||||
if (*s == ',') opt.q2 = strtol(s + 1, &s, 10);
|
||||
|
|
@ -139,7 +150,7 @@ int main(int argc, char *argv[])
|
|||
opt.min_dp_max = 200;
|
||||
} else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) {
|
||||
k = 15, w = 5;
|
||||
opt.flag |= MM_F_SPLICE;
|
||||
opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;
|
||||
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 200000;
|
||||
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0;
|
||||
opt.noncan = 4;
|
||||
|
|
@ -181,6 +192,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -E INT[,INT] gap extension penalty; a k-long gap costs min{O1+k*E1,O2+k*E2} [%d,%d]\n", opt.e, opt.e2);
|
||||
fprintf(stderr, " -z INT Z-drop score [%d]\n", opt.zdrop);
|
||||
fprintf(stderr, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
||||
fprintf(stderr, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
||||
fprintf(stderr, " Input/Output:\n");
|
||||
fprintf(stderr, " -Q ignore base quality in the input\n");
|
||||
fprintf(stderr, " -a output in the SAM format (PAF by default)\n");
|
||||
|
|
@ -198,7 +210,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
|
||||
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
|
||||
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
|
||||
fprintf(stderr, " splice: -k15 -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -z200 (long-read spliced aln)\n");
|
||||
fprintf(stderr, " splice: long-read spliced alignment (see minimap2.1 for details)\n");
|
||||
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
20
minimap.h
20
minimap.h
|
|
@ -5,16 +5,18 @@
|
|||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define MM_IDX_DEF_B 14
|
||||
#define MM_IDX_DEF_B 14
|
||||
|
||||
#define MM_F_NO_SELF 0x01
|
||||
#define MM_F_AVA 0x02
|
||||
#define MM_F_CIGAR 0x04
|
||||
#define MM_F_OUT_SAM 0x08
|
||||
#define MM_F_NO_QUAL 0x10
|
||||
#define MM_F_OUT_CG 0x20
|
||||
#define MM_F_OUT_CS 0x40
|
||||
#define MM_F_SPLICE 0x80
|
||||
#define MM_F_NO_SELF 0x001
|
||||
#define MM_F_AVA 0x002
|
||||
#define MM_F_CIGAR 0x004
|
||||
#define MM_F_OUT_SAM 0x008
|
||||
#define MM_F_NO_QUAL 0x010
|
||||
#define MM_F_OUT_CG 0x020
|
||||
#define MM_F_OUT_CS 0x040
|
||||
#define MM_F_SPLICE 0x080
|
||||
#define MM_F_SPLICE_FOR 0x100
|
||||
#define MM_F_SPLICE_REV 0x200
|
||||
|
||||
#define MM_IDX_MAGIC "MMI\2"
|
||||
|
||||
|
|
|
|||
27
minimap2.1
27
minimap2.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH minimap2 1 "12 August 2017" "minimap2-2.0-r290-dirty" "Bioinformatics tools"
|
||||
.TH minimap2 1 "13 August 2017" "minimap2-2.0-r296-dirty" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
minimap2 - mapping and alignment between collections of DNA sequences
|
||||
|
|
@ -205,6 +205,18 @@ the contiguity of the alignment at the cost of poor alignment in the middle
|
|||
Minimal peak DP alignment score to output [40]. The peak score is computed from
|
||||
the final CIGAR. It is the score of the max scoring segment in the alignment
|
||||
and may be different from the total alignment score.
|
||||
.TP
|
||||
.BI -u \ CHAR
|
||||
How to find canonical splicing sites GT-AG -
|
||||
.BR f :
|
||||
transcript strand;
|
||||
.BR b :
|
||||
both strands;
|
||||
.BR n :
|
||||
no attempt to match GT-AG [n]
|
||||
.TP
|
||||
.BI --cost-non-gt-ag \ INT
|
||||
Cost of non-canonical splicing sites [0].
|
||||
.SS Input/output options
|
||||
.TP 10
|
||||
.B -Q
|
||||
|
|
@ -302,11 +314,10 @@ is that this preset is not using HPC minimizers.
|
|||
.B splice
|
||||
Long-read spliced alignment
|
||||
.RB ( -k15
|
||||
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0
|
||||
.BR -z200 ).
|
||||
As of now, minimap2 only finds approximate exon boundaries. The true boundaries
|
||||
are usually within 10bp around the reported positions. In the splice mode,
|
||||
1) long deletions are taken as introns and represented as the
|
||||
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -z200 -ub --cost-non-gt-ag
|
||||
.BR 4 ).
|
||||
In the splice mode, 1) long deletions are taken as introns and represented as
|
||||
the
|
||||
.RB ` N '
|
||||
CIGAR operator; 2) long insertions are disabled; 3) deletion and insertion gap
|
||||
costs are different during chaining; 4) the computation of the
|
||||
|
|
@ -387,10 +398,6 @@ Minimap2 does not work well with Illumina short reads as of now.
|
|||
*
|
||||
Minimap2 requires SSE2 instructions to compile. It is possible to add
|
||||
non-SSE2 support, but it would make minimap2 slower by several times.
|
||||
.TP
|
||||
*
|
||||
In the splice mode, minimap2 is unable to find the precise exon boundaries.
|
||||
The true bounraries are usually within 10bp around the reported locations.
|
||||
.SH SEE ALSO
|
||||
.PP
|
||||
miniasm(1), minimap(1), bwa(1).
|
||||
|
|
|
|||
Loading…
Reference in New Issue