finished the first draft of manpage
This commit is contained in:
parent
a9f089f0aa
commit
2338e887d9
2
chain.c
2
chain.c
|
|
@ -50,7 +50,7 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
|||
dd = dr > dq? dr - dq : dq - dr;
|
||||
if (dd > bw) continue;
|
||||
sc = dq > q_span && dr > q_span? q_span : dq < dr? dq : dr;
|
||||
sc = f[j] + sc - (dd? ilog2_32(dd) : 0);
|
||||
sc = f[j] + sc - (dd? ilog2_32(dd) : 0); // TODO: consider to also penalize the shortest distance
|
||||
if (sc > max_f) max_f = sc, max_j = j;
|
||||
}
|
||||
if (max_j >= 0) f[i] = max_f, p[i] = max_j;
|
||||
|
|
|
|||
8
main.c
8
main.c
|
|
@ -135,14 +135,14 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -d FILE dump index to FILE []\n");
|
||||
fprintf(stderr, " Mapping:\n");
|
||||
fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
||||
fprintf(stderr, " -g INT split a mapping if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
||||
fprintf(stderr, " -r INT bandwidth [%d]\n", opt.bw);
|
||||
fprintf(stderr, " -n INT minimal number of minimizers [%d]\n", opt.min_cnt);
|
||||
fprintf(stderr, " -g INT stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
||||
fprintf(stderr, " -r INT bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
|
||||
fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
||||
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
||||
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
||||
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n");
|
||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||
fprintf(stderr, " -D FLOAT min fraction of seed matches [%g]\n", opt.min_seedcov_ratio);
|
||||
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
|
||||
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
||||
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||
|
|
|
|||
151
minimap2.1
151
minimap2.1
|
|
@ -8,6 +8,13 @@ minimap2 - mapping and alignment between collections of DNA sequences
|
|||
* Indexing the target sequences (optional):
|
||||
.RS 4
|
||||
minimap2
|
||||
.RB [ -x
|
||||
.IR preset ]
|
||||
.B -d
|
||||
.I target.mmi
|
||||
.I target.fa
|
||||
.br
|
||||
minimap2
|
||||
.RB [ -H ]
|
||||
.RB [ -k
|
||||
.IR kmer ]
|
||||
|
|
@ -74,18 +81,15 @@ SAM format.
|
|||
.TP 10
|
||||
.BI -k \ INT
|
||||
Minimizer k-mer length [17]
|
||||
|
||||
.TP
|
||||
.BI -w \ INT
|
||||
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
|
||||
in a window of w consecutive k-mers.
|
||||
|
||||
.TP
|
||||
.B -H
|
||||
Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
|
||||
contracting homopolymer runs to a single base. An HPC minimizer is a minimizer
|
||||
on the HPC sequence.
|
||||
|
||||
.TP
|
||||
.BI -I \ NUM
|
||||
Load at most
|
||||
|
|
@ -100,7 +104,6 @@ multiple times to map it against each batch of target sequences.
|
|||
.I NUM
|
||||
may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
|
||||
multi-part index.
|
||||
|
||||
.TP
|
||||
.BI -d \ FILE
|
||||
Save the minimizer index of
|
||||
|
|
@ -116,8 +119,125 @@ to
|
|||
Ignore top
|
||||
.I FLOAT
|
||||
fraction of most frequent minimizers [0.0002]
|
||||
.TP
|
||||
.BI -g \ INT
|
||||
Stop chain enlongation if there are no minimizers in
|
||||
.IR INT -bp
|
||||
[10000].
|
||||
.TP
|
||||
.BI -r \ INT
|
||||
Bandwidth used in chaining and DP-based alignment [1000]. This option
|
||||
approximately controls the maximum gap size.
|
||||
.TP
|
||||
.BI -n \ INT
|
||||
Discard chains consisting of
|
||||
.RI < INT
|
||||
number of minimizers [3]
|
||||
.TP
|
||||
.BI -m \ INT
|
||||
Discard chains with chaining score
|
||||
.RI < INT
|
||||
[40]. Chaining score equals the approximate number of matching bases (exact if
|
||||
not using
|
||||
.BR -H )
|
||||
minus base-2 logarithm gap penalty. It is computed with dynamic programming.
|
||||
.TP
|
||||
.B -S
|
||||
Perform all-vs-all mapping. In this mode, if the query sequence name is
|
||||
lexicographically larger than the target sequence name, the hits between them
|
||||
will be suppressed; if the query sequence name is the same as the target name,
|
||||
diagonal minimizer hits will also be suppressed.
|
||||
.TP
|
||||
.BI -p \ FLOAT
|
||||
Minimal secondary-to-primary score ratio to output secondary mappings [2].
|
||||
Between two chains overlaping over half of the shorter chain (controled by
|
||||
.BR --mask-level ),
|
||||
the chain with a lower score is secondary to the chain with a higher score.
|
||||
If the ratio of the scores is below
|
||||
.IR FLOAT ,
|
||||
the secondary chain will not be outputted or extended with DP alignment later.
|
||||
The default value suppresses all secondary chains.
|
||||
.TP
|
||||
.BI -D \ FLOAT
|
||||
Discard a chain if the fraction of matching bases over the length of
|
||||
query/target sequences in the chain is
|
||||
.RI < FLOAT
|
||||
[0].
|
||||
.TP
|
||||
.BI -x \ STR
|
||||
Preset []. This option applies multiple options at the same time. It should be
|
||||
applied before other options because options applied later will overwrite the
|
||||
values set by
|
||||
.BR -x .
|
||||
Available
|
||||
.I STR
|
||||
are:
|
||||
.RS
|
||||
.TP 8
|
||||
.B ava10k
|
||||
PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -Sw5 -p0 -m100 -D.05)
|
||||
.TP
|
||||
.B map10k
|
||||
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
||||
.TP
|
||||
.B asm1m
|
||||
Long assembly to reference mapping (-k19 -w19)
|
||||
.RE
|
||||
|
||||
.SS Alignment options
|
||||
|
||||
.TP 10
|
||||
.BI -A \ INT
|
||||
Matching score [1]
|
||||
.TP
|
||||
.BI -B \ INT
|
||||
Mismatching penalty [2]
|
||||
.TP
|
||||
.BI -O \ INT
|
||||
Gap open penalty [2]
|
||||
.TP
|
||||
.BI -E \ INT
|
||||
Gap extension penalty [1]. A gap of length
|
||||
.I l
|
||||
costs
|
||||
.RI {-O}+{-E}* l .
|
||||
.TP
|
||||
.BI -z \ INT
|
||||
Break an alignment if the running score drops too quickly along the diagonal of
|
||||
the DP matrix (diagonal X-drop, or Z-drop) [200]. Increasing the value improves
|
||||
the contiguity of the alignment at the cost of poor alignment in the middle
|
||||
(e.g. caused by a long inversion).
|
||||
.TP
|
||||
.BI -s \ INT
|
||||
Minimal peak DP alignment score to output [40]. The peak score is computed from
|
||||
the final CIGAR. It is the score of the max scoring segment in the alignment
|
||||
and may be different from the total alignment score.
|
||||
|
||||
.SS Input/output options
|
||||
|
||||
.TP 10
|
||||
.B -b
|
||||
Generate CIGAR and output alignments in the SAM format. Minimap2 outputs in PAF
|
||||
by default.
|
||||
.TP
|
||||
.B -c
|
||||
Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag.
|
||||
.TP
|
||||
.BI -t \ INT
|
||||
Number of threads [3]. Minimap2 uses at most three threads when collecting
|
||||
minimizers on target sequences, and uses up to
|
||||
.IR INT +1
|
||||
threads when mapping (the extra thread is for I/O, which is frequently idle and
|
||||
takes little CPU time).
|
||||
.TP
|
||||
.B -V
|
||||
Print version number to stdout
|
||||
|
||||
.SH OUTPUT FORMAT
|
||||
.PP
|
||||
Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by
|
||||
default. PAF is a TAB-delimited text format with each line consisting of at
|
||||
least 12 fields as are described in the following table:
|
||||
|
||||
.TS
|
||||
center box;
|
||||
|
|
@ -139,6 +259,29 @@ _
|
|||
12 int Mapping quality (0-255 with 255 for missing)
|
||||
.TE
|
||||
|
||||
.PP
|
||||
When alignment is available, column 11 gives the total number of sequence
|
||||
matches, mismatches and gaps in the alignment; column 10 divided by column 11
|
||||
gives the BLAST-like alignment identity. When alignment is unavailable,
|
||||
these two columns are approximate. PAF may optionally have additional fields in
|
||||
the SAM-like typed key-value format. Minimap2 may output the following tags:
|
||||
|
||||
.TS
|
||||
center box;
|
||||
cb | cb | cb
|
||||
r | c | l .
|
||||
Tag Type Description
|
||||
_
|
||||
cm i Number of minimizers on the chain
|
||||
s1 i Chaining score
|
||||
s2 i Chaining score of the best secondary chain
|
||||
NM i Total number of mismatches and gaps in the alignment
|
||||
AS i DP alignment score
|
||||
ms i DP score of the max scoring segment in the alignment
|
||||
nn i Number of ambiguous bases in the alignment
|
||||
cg Z CIGAR string (only in PAF)
|
||||
.TE
|
||||
|
||||
.SH SEE ALSO
|
||||
.PP
|
||||
miniasm(1), minimap(1), bwa(1).
|
||||
|
|
|
|||
Loading…
Reference in New Issue