diff --git a/main.c b/main.c index c0e811d..1a9efd8 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.3-r538-dirty" +#define MM_VERSION "2.3-r539-dirty" #ifdef __linux__ #include diff --git a/map.c b/map.c index f99626d..e78975e 100644 --- a/map.c +++ b/map.c @@ -105,7 +105,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->mini_batch_size = 50000000; } else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) { io->is_hpc = 0, io->k = 15, io->w = 5; - mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV; + mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK; mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000; mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0; mo->noncan = 9; diff --git a/minimap2.1 b/minimap2.1 index b7d7177..97e1790 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -220,7 +220,9 @@ costs In the splice mode, the second gap penalties are not used. .TP .BI -C \ INT -Cost for a non-canonical GT-AG splicing [0] +Cost for a non-canonical GT-AG splicing (effective with +.BR --splice ) +[0] .TP .BI -z \ INT Break an alignment if the running score drops too quickly along the diagonal of @@ -243,7 +245,25 @@ both strands; no attempt to match GT-AG [n] .TP .BI --end-bonus \ INT -Score bonus when alignment extends to the end of the query sequence [10]. +Score bonus when alignment extends to the end of the query sequence [0]. +.TP +.BR --splice-flank [= yes | no ] +Assume the next base to a +.B GT +donor site tends to be A/G (91% in human and 92% in mouse) and the preceding +base to a +.B AG +acceptor tends to be C/T [yes with +.BR --splice ]. +This trend is evolutionarily conservative, all the way to S. cerevisiae +(PMID:18688272). Specifying this option generally leads to higher junction +accuracy by several percents, so it is applied by default with +.BR --splice . +However, the SIRV control does not honor this trend +(only ~60%). This option reduces accuracy. If you are benchmarking minimap2 +on SIRV data, please add +.B --splice-flank=no +to the command line. .SS Input/output options .TP 10 .B -a @@ -261,7 +281,7 @@ the real CIGAR in memory. .TP .BI -R \ STR SAM read group line in a format like -.RB @RG\\\\tID:foo\\\\tSM:bar +.B @RG\\\\tID:foo\\\\tSM:bar []. .TP .B -c @@ -371,8 +391,8 @@ is that this preset is not using HPC minimizers. .B splice Long-read spliced alignment .RB ( -k15 -.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -.BR -ub ). +.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub +.BR --splice-flank=yes ). In the splice mode, 1) long deletions are taken as introns and represented as the .RB ` N '