finished the first draft of manpage
This commit is contained in:
parent
a9f089f0aa
commit
2338e887d9
2
chain.c
2
chain.c
|
|
@ -50,7 +50,7 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
||||||
dd = dr > dq? dr - dq : dq - dr;
|
dd = dr > dq? dr - dq : dq - dr;
|
||||||
if (dd > bw) continue;
|
if (dd > bw) continue;
|
||||||
sc = dq > q_span && dr > q_span? q_span : dq < dr? dq : dr;
|
sc = dq > q_span && dr > q_span? q_span : dq < dr? dq : dr;
|
||||||
sc = f[j] + sc - (dd? ilog2_32(dd) : 0);
|
sc = f[j] + sc - (dd? ilog2_32(dd) : 0); // TODO: consider to also penalize the shortest distance
|
||||||
if (sc > max_f) max_f = sc, max_j = j;
|
if (sc > max_f) max_f = sc, max_j = j;
|
||||||
}
|
}
|
||||||
if (max_j >= 0) f[i] = max_f, p[i] = max_j;
|
if (max_j >= 0) f[i] = max_f, p[i] = max_j;
|
||||||
|
|
|
||||||
8
main.c
8
main.c
|
|
@ -135,14 +135,14 @@ int main(int argc, char *argv[])
|
||||||
fprintf(stderr, " -d FILE dump index to FILE []\n");
|
fprintf(stderr, " -d FILE dump index to FILE []\n");
|
||||||
fprintf(stderr, " Mapping:\n");
|
fprintf(stderr, " Mapping:\n");
|
||||||
fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
||||||
fprintf(stderr, " -g INT split a mapping if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
fprintf(stderr, " -g INT stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
||||||
fprintf(stderr, " -r INT bandwidth [%d]\n", opt.bw);
|
fprintf(stderr, " -r INT bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
|
||||||
fprintf(stderr, " -n INT minimal number of minimizers [%d]\n", opt.min_cnt);
|
fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
||||||
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
||||||
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
||||||
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n");
|
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n");
|
||||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||||
fprintf(stderr, " -D FLOAT min fraction of seed matches [%g]\n", opt.min_seedcov_ratio);
|
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
|
||||||
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
||||||
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
||||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||||
|
|
|
||||||
151
minimap2.1
151
minimap2.1
|
|
@ -8,6 +8,13 @@ minimap2 - mapping and alignment between collections of DNA sequences
|
||||||
* Indexing the target sequences (optional):
|
* Indexing the target sequences (optional):
|
||||||
.RS 4
|
.RS 4
|
||||||
minimap2
|
minimap2
|
||||||
|
.RB [ -x
|
||||||
|
.IR preset ]
|
||||||
|
.B -d
|
||||||
|
.I target.mmi
|
||||||
|
.I target.fa
|
||||||
|
.br
|
||||||
|
minimap2
|
||||||
.RB [ -H ]
|
.RB [ -H ]
|
||||||
.RB [ -k
|
.RB [ -k
|
||||||
.IR kmer ]
|
.IR kmer ]
|
||||||
|
|
@ -74,18 +81,15 @@ SAM format.
|
||||||
.TP 10
|
.TP 10
|
||||||
.BI -k \ INT
|
.BI -k \ INT
|
||||||
Minimizer k-mer length [17]
|
Minimizer k-mer length [17]
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BI -w \ INT
|
.BI -w \ INT
|
||||||
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
|
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
|
||||||
in a window of w consecutive k-mers.
|
in a window of w consecutive k-mers.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B -H
|
.B -H
|
||||||
Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
|
Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
|
||||||
contracting homopolymer runs to a single base. An HPC minimizer is a minimizer
|
contracting homopolymer runs to a single base. An HPC minimizer is a minimizer
|
||||||
on the HPC sequence.
|
on the HPC sequence.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BI -I \ NUM
|
.BI -I \ NUM
|
||||||
Load at most
|
Load at most
|
||||||
|
|
@ -100,7 +104,6 @@ multiple times to map it against each batch of target sequences.
|
||||||
.I NUM
|
.I NUM
|
||||||
may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
|
may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
|
||||||
multi-part index.
|
multi-part index.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BI -d \ FILE
|
.BI -d \ FILE
|
||||||
Save the minimizer index of
|
Save the minimizer index of
|
||||||
|
|
@ -116,8 +119,125 @@ to
|
||||||
Ignore top
|
Ignore top
|
||||||
.I FLOAT
|
.I FLOAT
|
||||||
fraction of most frequent minimizers [0.0002]
|
fraction of most frequent minimizers [0.0002]
|
||||||
|
.TP
|
||||||
|
.BI -g \ INT
|
||||||
|
Stop chain enlongation if there are no minimizers in
|
||||||
|
.IR INT -bp
|
||||||
|
[10000].
|
||||||
|
.TP
|
||||||
|
.BI -r \ INT
|
||||||
|
Bandwidth used in chaining and DP-based alignment [1000]. This option
|
||||||
|
approximately controls the maximum gap size.
|
||||||
|
.TP
|
||||||
|
.BI -n \ INT
|
||||||
|
Discard chains consisting of
|
||||||
|
.RI < INT
|
||||||
|
number of minimizers [3]
|
||||||
|
.TP
|
||||||
|
.BI -m \ INT
|
||||||
|
Discard chains with chaining score
|
||||||
|
.RI < INT
|
||||||
|
[40]. Chaining score equals the approximate number of matching bases (exact if
|
||||||
|
not using
|
||||||
|
.BR -H )
|
||||||
|
minus base-2 logarithm gap penalty. It is computed with dynamic programming.
|
||||||
|
.TP
|
||||||
|
.B -S
|
||||||
|
Perform all-vs-all mapping. In this mode, if the query sequence name is
|
||||||
|
lexicographically larger than the target sequence name, the hits between them
|
||||||
|
will be suppressed; if the query sequence name is the same as the target name,
|
||||||
|
diagonal minimizer hits will also be suppressed.
|
||||||
|
.TP
|
||||||
|
.BI -p \ FLOAT
|
||||||
|
Minimal secondary-to-primary score ratio to output secondary mappings [2].
|
||||||
|
Between two chains overlaping over half of the shorter chain (controled by
|
||||||
|
.BR --mask-level ),
|
||||||
|
the chain with a lower score is secondary to the chain with a higher score.
|
||||||
|
If the ratio of the scores is below
|
||||||
|
.IR FLOAT ,
|
||||||
|
the secondary chain will not be outputted or extended with DP alignment later.
|
||||||
|
The default value suppresses all secondary chains.
|
||||||
|
.TP
|
||||||
|
.BI -D \ FLOAT
|
||||||
|
Discard a chain if the fraction of matching bases over the length of
|
||||||
|
query/target sequences in the chain is
|
||||||
|
.RI < FLOAT
|
||||||
|
[0].
|
||||||
|
.TP
|
||||||
|
.BI -x \ STR
|
||||||
|
Preset []. This option applies multiple options at the same time. It should be
|
||||||
|
applied before other options because options applied later will overwrite the
|
||||||
|
values set by
|
||||||
|
.BR -x .
|
||||||
|
Available
|
||||||
|
.I STR
|
||||||
|
are:
|
||||||
|
.RS
|
||||||
|
.TP 8
|
||||||
|
.B ava10k
|
||||||
|
PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -Sw5 -p0 -m100 -D.05)
|
||||||
|
.TP
|
||||||
|
.B map10k
|
||||||
|
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
||||||
|
.TP
|
||||||
|
.B asm1m
|
||||||
|
Long assembly to reference mapping (-k19 -w19)
|
||||||
|
.RE
|
||||||
|
|
||||||
|
.SS Alignment options
|
||||||
|
|
||||||
|
.TP 10
|
||||||
|
.BI -A \ INT
|
||||||
|
Matching score [1]
|
||||||
|
.TP
|
||||||
|
.BI -B \ INT
|
||||||
|
Mismatching penalty [2]
|
||||||
|
.TP
|
||||||
|
.BI -O \ INT
|
||||||
|
Gap open penalty [2]
|
||||||
|
.TP
|
||||||
|
.BI -E \ INT
|
||||||
|
Gap extension penalty [1]. A gap of length
|
||||||
|
.I l
|
||||||
|
costs
|
||||||
|
.RI {-O}+{-E}* l .
|
||||||
|
.TP
|
||||||
|
.BI -z \ INT
|
||||||
|
Break an alignment if the running score drops too quickly along the diagonal of
|
||||||
|
the DP matrix (diagonal X-drop, or Z-drop) [200]. Increasing the value improves
|
||||||
|
the contiguity of the alignment at the cost of poor alignment in the middle
|
||||||
|
(e.g. caused by a long inversion).
|
||||||
|
.TP
|
||||||
|
.BI -s \ INT
|
||||||
|
Minimal peak DP alignment score to output [40]. The peak score is computed from
|
||||||
|
the final CIGAR. It is the score of the max scoring segment in the alignment
|
||||||
|
and may be different from the total alignment score.
|
||||||
|
|
||||||
|
.SS Input/output options
|
||||||
|
|
||||||
|
.TP 10
|
||||||
|
.B -b
|
||||||
|
Generate CIGAR and output alignments in the SAM format. Minimap2 outputs in PAF
|
||||||
|
by default.
|
||||||
|
.TP
|
||||||
|
.B -c
|
||||||
|
Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag.
|
||||||
|
.TP
|
||||||
|
.BI -t \ INT
|
||||||
|
Number of threads [3]. Minimap2 uses at most three threads when collecting
|
||||||
|
minimizers on target sequences, and uses up to
|
||||||
|
.IR INT +1
|
||||||
|
threads when mapping (the extra thread is for I/O, which is frequently idle and
|
||||||
|
takes little CPU time).
|
||||||
|
.TP
|
||||||
|
.B -V
|
||||||
|
Print version number to stdout
|
||||||
|
|
||||||
.SH OUTPUT FORMAT
|
.SH OUTPUT FORMAT
|
||||||
|
.PP
|
||||||
|
Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by
|
||||||
|
default. PAF is a TAB-delimited text format with each line consisting of at
|
||||||
|
least 12 fields as are described in the following table:
|
||||||
|
|
||||||
.TS
|
.TS
|
||||||
center box;
|
center box;
|
||||||
|
|
@ -139,6 +259,29 @@ _
|
||||||
12 int Mapping quality (0-255 with 255 for missing)
|
12 int Mapping quality (0-255 with 255 for missing)
|
||||||
.TE
|
.TE
|
||||||
|
|
||||||
|
.PP
|
||||||
|
When alignment is available, column 11 gives the total number of sequence
|
||||||
|
matches, mismatches and gaps in the alignment; column 10 divided by column 11
|
||||||
|
gives the BLAST-like alignment identity. When alignment is unavailable,
|
||||||
|
these two columns are approximate. PAF may optionally have additional fields in
|
||||||
|
the SAM-like typed key-value format. Minimap2 may output the following tags:
|
||||||
|
|
||||||
|
.TS
|
||||||
|
center box;
|
||||||
|
cb | cb | cb
|
||||||
|
r | c | l .
|
||||||
|
Tag Type Description
|
||||||
|
_
|
||||||
|
cm i Number of minimizers on the chain
|
||||||
|
s1 i Chaining score
|
||||||
|
s2 i Chaining score of the best secondary chain
|
||||||
|
NM i Total number of mismatches and gaps in the alignment
|
||||||
|
AS i DP alignment score
|
||||||
|
ms i DP score of the max scoring segment in the alignment
|
||||||
|
nn i Number of ambiguous bases in the alignment
|
||||||
|
cg Z CIGAR string (only in PAF)
|
||||||
|
.TE
|
||||||
|
|
||||||
.SH SEE ALSO
|
.SH SEE ALSO
|
||||||
.PP
|
.PP
|
||||||
miniasm(1), minimap(1), bwa(1).
|
miniasm(1), minimap(1), bwa(1).
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue