r475: added --cs=none; updated manpage

This commit is contained in:
Heng Li 2017-10-05 15:27:37 -04:00
parent f4a5d3a692
commit b839758335
2 changed files with 42 additions and 6 deletions

6
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.2-r474-dirty"
#define MM_VERSION "2.2-r475-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -143,6 +143,8 @@ int main(int argc, char *argv[])
opt.flag &= ~MM_F_OUT_CS_LONG;
} else if (strcmp(optarg, "long") == 0) {
opt.flag |= MM_F_OUT_CS_LONG;
} else if (strcmp(optarg, "none") == 0) {
opt.flag &= ~MM_F_OUT_CS;
} else if (mm_verbose >= 2) {
fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
}
@ -222,7 +224,7 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -Q don't output base quality in SAM\n");
fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n");
fprintf(fp_help, " -c output CIGAR in PAF\n");
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [no cs]\n");
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
fprintf(fp_help, " -K NUM minibatch size for mapping [200M]\n");
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);

View File

@ -234,6 +234,21 @@ SAM read group line in a format like
.B -c
Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag.
.TP
.BI --cs[= STR ]
Output the
.B cs
tag.
.I STR
can be either
.I short
or
.IR long .
If no
.I STR
is given,
.I short
is assumed. [none]
.TP
.BI -t \ INT
Number of threads [3]. Minimap2 uses at most three threads when indexing target
sequences, and uses up to
@ -332,7 +347,8 @@ tag ignores introns to demote hits to pseudogenes.
.B sr
Short single-end reads without splicing
.RB ( -k21
.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000 -n2 -m20 -s40 -g100 -K50m
.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200
.B -K50m --multi
.BR --sr ).
.RE
.SS Miscellaneous options
@ -395,6 +411,27 @@ ms i DP score of the max scoring segment in the alignment
nn i Number of ambiguous bases in the alignment
ts A Transcript strand (splice mode only)
cg Z CIGAR string (only in PAF)
cs Z Difference string
.TE
.PP
The
.B cs
tag encodes difference sequences in the short form or the entire query
.I AND
reference sequences in the long form. It consists of a series of operations:
.TS
center box;
cb | cb |cb
r | l | l .
Op Regex Description
_
= [ACGTN]+ Identical sequence (long form)
: [0-9]+ Identical sequence length
* [acgtn][acgtn] Substitution: ref to query
+ [acgtn]+ Insertion to the reference
- [acgtn]+ Deletion from the reference
~ [acgtn]{2}[0-9]+[acgtn]{2} Intron length and splice signal
.TE
.SH LIMITATIONS
@ -405,9 +442,6 @@ where seed positions may be suboptimal. This should not be a big concern
because even the optimal alignment may be wrong in such regions.
.TP
*
Minimap2 does not work well with Illumina short reads as of now.
.TP
*
Minimap2 requires SSE2 instructions to compile. It is possible to add
non-SSE2 support, but it would make minimap2 slower by several times.
.SH SEE ALSO