r748: optionally to use system getopt() (#134)

This commit is contained in:
Heng Li 2018-03-19 11:18:26 -04:00
parent b81d45510e
commit d1050f4eaf
2 changed files with 28 additions and 20 deletions

View File

@ -7,10 +7,10 @@
* [Evaluating mapping accuracy with simulated reads (for developers)](#mapeval) * [Evaluating mapping accuracy with simulated reads (for developers)](#mapeval)
- [Full-Genome Alignment](#genome-aln) - [Full-Genome Alignment](#genome-aln)
* [Intra-species assembly alignment](#asm-to-ref) * [Intra-species assembly alignment](#asm-to-ref)
* [Cross-species full-genome alignment](#x-species)
* [Eyeballing alignment](#view-aln)
* [Calling variants from assembly-to-reference alignment](#asm-var) * [Calling variants from assembly-to-reference alignment](#asm-var)
* [Lift Over](#liftover) * [Lift Over](#liftover)
* [Cross-species alignment](#x-species)
* [Print alignment](#view-aln)
- [Read Overlap](#read-overlap) - [Read Overlap](#read-overlap)
* [Long-read overlap](#long-read-overlap) * [Long-read overlap](#long-read-overlap)
* [Evaluating overlap sensitivity (for developers)](#ov-eval) * [Evaluating overlap sensitivity (for developers)](#ov-eval)
@ -79,7 +79,7 @@ paftools.js pbsim2fq ../ecoli_ref.fa.fai sd_0001.maf > ../ecoli_pbsim.fa
# mason2 simulation # mason2 simulation
mason_simulator --illumina-prob-mismatch-scale 2.5 -ir ecoli_ref.fa -n 10000 -o tmp-l.fq -or tmp-r.fq -oa tmp.sam mason_simulator --illumina-prob-mismatch-scale 2.5 -ir ecoli_ref.fa -n 10000 -o tmp-l.fq -or tmp-r.fq -oa tmp.sam
paftools.js mason2fq tmp.sam | seqtk seq -1 > ecoli_mason_1.fq paftools.js mason2fq tmp.sam | seqtk seq -1 > ecoli_mason_1.fq
paftools.js mason2fq tmp.sam | seqtk seq -1 > ecoli_mason_2.fq paftools.js mason2fq tmp.sam | seqtk seq -2 > ecoli_mason_2.fq
``` ```
@ -95,6 +95,24 @@ Here `ecoli_canu.fa` is the Canu assembly of `ecoli_p6_25x_canu.fa`. This
command line outputs alignments in the [PAF format][paf]. Use `-a` instead of command line outputs alignments in the [PAF format][paf]. Use `-a` instead of
`-c` to get output in the SAM format. `-c` to get output in the SAM format.
### <a name="x-species"></a>Cross-species full-genome alignment
```sh
minimap2 -cx asm20 --cs ecoli_ref.fa ecoli_O104:H4.fa > ecoli_O104:H4.paf
sort -k6,6 -k8,8n ecoli_O104:H4.paf | paftools.js call -f ecoli_ref.fa -L10000 -l1000 - > out.vcf
```
Minimap2 has three presets for full-genome alignment: "asm5" for sequence
divergence below 1%, "asm10" for divergence around a couple of percent and
"asm20" for divergence not more than 10%. In theory, with the right setting,
minimap2 should work for sequence pairs with sequence divergence up to ~15%,
but this has not been carefully evaluated.
### <a name="view-aln"></a>Eyeballing alignment
```sh
# option "--cs" required; minimap2-r741 or higher required for the "asm20" preset
minimap2 -cx asm20 --cs ecoli_ref.fa ecoli_O104:H4.fa | paftools.js view - | less -S
```
This prints the alignment in a BLAST-like format.
### <a name="asm-var"></a>Calling variants from assembly-to-reference alignment ### <a name="asm-var"></a>Calling variants from assembly-to-reference alignment
```sh ```sh
# don't forget the "--cs" option; otherwise it doesn't work # don't forget the "--cs" option; otherwise it doesn't work
@ -108,27 +126,13 @@ This information is not available in the VCF output.
### <a name="liftover"></a>Lift over ### <a name="liftover"></a>Lift over
```sh ```sh
minimap2 -cx asm5 --cs ecoli_ref.fa ecoli_canu.fa > ecoli_canu.paf
echo -e 'tig00000001\t200000\t300000' | paftools.js liftover ecoli_canu.paf - echo -e 'tig00000001\t200000\t300000' | paftools.js liftover ecoli_canu.paf -
``` ```
This lifts over a region on query sequences to one or multiple regions on This lifts over a region on query sequences to one or multiple regions on
reference sequences. Note that this paftools.js command may not be efficient reference sequences. Note that this paftools.js command may not be efficient
enough to lift millions of regions. enough to lift millions of regions.
### <a name="x-species"></a>Cross-species alignment
```sh
minimap2 -cx asm20 --cs ecoli_ref.fa ecoli_O104:H4.fa > ecoli_O104:H4.paf
sort -k6,6 -k8,8n ecoli_O104:H4.paf | paftools.js call -f ecoli_ref.fa -L10000 -l1000 - > out.vcf
```
Minimap2 only works when the sequence divergence is no more than ~15%.
### <a name="view-aln"></a>Print alignment
```sh
# option "--cs" required; minimap2-r741 or higher required for the "asm20" preset
minimap2 -cx asm20 --cs ecoli_ref.fa ecoli_O104:H4.fa | paftools.js view - | less -S
```
This prints the alignment in a BLAST-like format.
## <a name="read-overlap"></a>Read Overlap ## <a name="read-overlap"></a>Read Overlap

8
main.c
View File

@ -4,9 +4,13 @@
#include "bseq.h" #include "bseq.h"
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#ifdef HAVE_GETOPT
#include <getopt.h>
#else
#include "getopt.h" #include "getopt.h"
#endif
#define MM_VERSION "2.9-r741-dirty" #define MM_VERSION "2.9-r748-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>
@ -111,7 +115,7 @@ int main(int argc, char *argv[])
} }
break; break;
} }
optreset = 1; optind = 0; // for musl getopt, optind=0 has the same effect as optreset=1; older libc doesn't have optreset
while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) { while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) {
if (c == 'w') ipt.w = atoi(optarg); if (c == 'w') ipt.w = atoi(optarg);