diff --git a/main.c b/main.c index 086896c..9088a29 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r474-dirty" +#define MM_VERSION "2.2-r475-dirty" #ifdef __linux__ #include @@ -143,6 +143,8 @@ int main(int argc, char *argv[]) opt.flag &= ~MM_F_OUT_CS_LONG; } else if (strcmp(optarg, "long") == 0) { opt.flag |= MM_F_OUT_CS_LONG; + } else if (strcmp(optarg, "none") == 0) { + opt.flag &= ~MM_F_OUT_CS; } else if (mm_verbose >= 2) { fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n"); } @@ -222,7 +224,7 @@ int main(int argc, char *argv[]) fprintf(fp_help, " -Q don't output base quality in SAM\n"); fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n"); fprintf(fp_help, " -c output CIGAR in PAF\n"); - fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [no cs]\n"); + fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n"); fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads); fprintf(fp_help, " -K NUM minibatch size for mapping [200M]\n"); // fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose); diff --git a/minimap2.1 b/minimap2.1 index 6ab8112..4cea3f7 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -234,6 +234,21 @@ SAM read group line in a format like .B -c Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag. .TP +.BI --cs[= STR ] +Output the +.B cs +tag. +.I STR +can be either +.I short +or +.IR long . +If no +.I STR +is given, +.I short +is assumed. [none] +.TP .BI -t \ INT Number of threads [3]. Minimap2 uses at most three threads when indexing target sequences, and uses up to @@ -332,7 +347,8 @@ tag ignores introns to demote hits to pseudogenes. .B sr Short single-end reads without splicing .RB ( -k21 -.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000 -n2 -m20 -s40 -g100 -K50m +.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200 +.B -K50m --multi .BR --sr ). .RE .SS Miscellaneous options @@ -395,6 +411,27 @@ ms i DP score of the max scoring segment in the alignment nn i Number of ambiguous bases in the alignment ts A Transcript strand (splice mode only) cg Z CIGAR string (only in PAF) +cs Z Difference string +.TE + +.PP +The +.B cs +tag encodes difference sequences in the short form or the entire query +.I AND +reference sequences in the long form. It consists of a series of operations: +.TS +center box; +cb | cb |cb +r | l | l . +Op Regex Description +_ + = [ACGTN]+ Identical sequence (long form) + : [0-9]+ Identical sequence length + * [acgtn][acgtn] Substitution: ref to query + + [acgtn]+ Insertion to the reference + - [acgtn]+ Deletion from the reference + ~ [acgtn]{2}[0-9]+[acgtn]{2} Intron length and splice signal .TE .SH LIMITATIONS @@ -405,9 +442,6 @@ where seed positions may be suboptimal. This should not be a big concern because even the optimal alignment may be wrong in such regions. .TP * -Minimap2 does not work well with Illumina short reads as of now. -.TP -* Minimap2 requires SSE2 instructions to compile. It is possible to add non-SSE2 support, but it would make minimap2 slower by several times. .SH SEE ALSO