Documented some k8 scripts; more coming
This commit is contained in:
parent
35d3e064bf
commit
ddc31f57ba
|
|
@ -1,28 +1,67 @@
|
|||
The [K8 Javascript shell][k8] is needed to run Javascripts in this directory.
|
||||
Precompiled k8 binaries for Mac and Linux can be found at the [K8 release
|
||||
page][k8bin].
|
||||
## <a name="started"></a>Getting Started
|
||||
```sh
|
||||
curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf -
|
||||
cp k8-0.2.4/k8-`uname -s` k8 # or better copy to a directory on PATH
|
||||
minimap2 --cs test/MT-*.fa | paf2aln.js - | less # pretty print base alignment
|
||||
sam2paf.js aln.sam.gz | less -S # convert SAM to PAF
|
||||
gff2bed.js anno.gtf | less -S # convert GTF/GFF3 to BED12
|
||||
minimap2 -cx splice ref.fa rna-seq.fq | splice2bed.js - # convert splice aln to BED12
|
||||
```
|
||||
|
||||
* [paf2aln.js](paf2aln.js): convert PAF to [MAF][maf] or BLAST-like output for
|
||||
eyeballing. PAF has to be generated with minimap2 option `-S`, which writes
|
||||
the aligned sequences to the `cs` tag. An example:
|
||||
```sh
|
||||
../minimap2 -S ../test/MT-*.fa | k8 paf2aln.js /dev/stdin
|
||||
```
|
||||
## Introduction
|
||||
|
||||
* [mapstat.js](mapstat.js): output basic statistics such as the number of
|
||||
non-redundant mapped bases, number of split and secondary alignments and
|
||||
number of long gaps. This scripts seamlessly works with both SAM and PAF.
|
||||
This directory contains auxiliary scripts for format conversion, mapping
|
||||
accuracy evaluation and miscellaneous purposes. These scripts *require*
|
||||
the [k8 Javascript shell][k8] to run. On Linux or Mac, you can download
|
||||
the precompiled k8 binary with:
|
||||
```sh
|
||||
curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf -
|
||||
cp k8-0.2.4/k8-`uname -s` k8
|
||||
```
|
||||
It is highly recommended to copy the executable `k8` to a directory on your
|
||||
`PATH` such as `/usr/bin/env` can find them.
|
||||
|
||||
* [sim-pbsim.js](sim-pbsim.js): convert reads simulated with [PBSIM][pbsim] to
|
||||
FASTA and encode the true mapping positions to read names in a format like
|
||||
`S1_33!chr1!225258409!225267761!-`.
|
||||
## Use Cases
|
||||
|
||||
* [sim-eval.js](sim-eval.js): evaluate mapping accuracy for FASTA generated
|
||||
with [sim-pbsim.js](sim-pbsim.js) or [sim-mason2.js](sim-mason2.js).
|
||||
### paf2aln.js: convert PAF to other formats
|
||||
|
||||
* [sam2paf.js](sam2paf.js): convert SAM to PAF.
|
||||
Script [paf2aln.js](paf2aln.js) converts PAF with the [cs tag][cs] to
|
||||
[MAF][maf] or BLAST-like output. It only works with minimap2 output generated
|
||||
using the `--cs` tag.
|
||||
|
||||
### Evaluating mapping accuracy with simulated reads
|
||||
|
||||
Script [sim-pbsim.js](sim-pbsim.js) converts the MAF output of [pbsim][pbsim]
|
||||
to FASTQ and encodes the true mapping position in the read name in a format like
|
||||
`S1_33!chr1!225258409!225267761!-`. Similarly, script
|
||||
[sim-mason2.js](sim-mason2.js) converts [mason2][mason2] simulated SAM to
|
||||
FASTQ.
|
||||
|
||||
Script [sim-eval.js](sim-eval.js) evaluates mapped SAM/PAF.
|
||||
```
|
||||
Q 60 32478 0 0.000000000 32478
|
||||
Q 22 16 1 0.000030775 32494
|
||||
Q 21 43 1 0.000061468 32537
|
||||
Q 19 73 1 0.000091996 32610
|
||||
Q 14 66 1 0.000122414 32676
|
||||
Q 10 27 3 0.000214048 32703
|
||||
Q 8 14 1 0.000244521 32717
|
||||
Q 7 13 2 0.000305530 32730
|
||||
Q 6 46 1 0.000335611 32776
|
||||
Q 3 10 1 0.000366010 32786
|
||||
Q 2 20 2 0.000426751 32806
|
||||
Q 1 248 94 0.003267381 33054
|
||||
Q 0 31 17 0.003778147 33085
|
||||
U 3
|
||||
```
|
||||
where each Q-line gives the quality threshold, the number of reads mapped with
|
||||
mapping quality equal to or greater than the threshold, number of wrong
|
||||
mappings, accumulative mapping error rate and the accumulative number of
|
||||
mapped reads. The U-line gives the number of unmapped reads if they are present
|
||||
in the SAM file.
|
||||
|
||||
[cs]: https://github.com/lh3/minimap2#cs
|
||||
[k8]: https://github.com/attractivechaos/k8
|
||||
[k8bin]: https://github.com/attractivechaos/k8/releases
|
||||
[maf]: https://genome.ucsc.edu/FAQ/FAQformat#format5
|
||||
[pbsim]: https://github.com/pfaucon/PBSIM-PacBio-Simulator
|
||||
[mason2]: https://github.com/seqan/seqan/tree/master/apps/mason2
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
var getopt = function(args, ostr) {
|
||||
var oli; // option letter list index
|
||||
if (typeof(getopt.place) == 'undefined')
|
||||
|
|
@ -99,7 +101,7 @@ function print_bed12(exons, cds_st, cds_en, is_short)
|
|||
var re_gtf = /(transcript_id|transcript_type|transcript_biotype|gene_name|transcript_name) "([^"]+)";/g;
|
||||
var re_gff3 = /(transcript_id|transcript_type|transcript_biotype|gene_name|transcript_name)=([^;]+)/g;
|
||||
var buf = new Bytes();
|
||||
var file = new File(arguments[getopt.ind]);
|
||||
var file = arguments[getopt.ind] == '-'? new File() : new File(arguments[getopt.ind]);
|
||||
|
||||
var exons = [], cds_st = 1<<30, cds_en = 0, last_id = null;
|
||||
while (file.readline(buf) >= 0) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
var getopt = function(args, ostr) {
|
||||
var oli; // option letter list index
|
||||
if (typeof(getopt.place) == 'undefined')
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
var getopt = function(args, ostr) {
|
||||
var oli; // option letter list index
|
||||
if (typeof(getopt.place) == 'undefined')
|
||||
|
|
@ -40,7 +42,7 @@ var c, pri_only = false;
|
|||
while ((c = getopt(arguments, "p")) != null)
|
||||
if (c == 'p') pri_only = true;
|
||||
|
||||
var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]);
|
||||
var file = arguments.length == getopt.ind || arguments[getopt.ind] == "-"? new File() : new File(arguments[getopt.ind]);
|
||||
var buf = new Bytes();
|
||||
var re = /(\d+)([MIDSHNX=])/g;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
var getopt = function(args, ostr) {
|
||||
var oli; // option letter list index
|
||||
if (typeof(getopt.place) == 'undefined')
|
||||
|
|
@ -44,7 +46,7 @@ while ((c = getopt(arguments, "Q:r:m:c")) != null) {
|
|||
else if (c == 'c') cap_short_mapq = true;
|
||||
}
|
||||
|
||||
var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]);
|
||||
var file = arguments.length == getopt.ind || arguments[getopt.ind] == '-'? new File() : new File(arguments[getopt.ind]);
|
||||
var buf = new Bytes();
|
||||
|
||||
var tot = [], err = [];
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
Bytes.prototype.reverse = function()
|
||||
{
|
||||
for (var i = 0; i < this.length>>1; ++i) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
Bytes.prototype.reverse = function()
|
||||
{
|
||||
for (var i = 0; i < this.length>>1; ++i) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env k8
|
||||
|
||||
var getopt = function(args, ostr) {
|
||||
var oli; // option letter list index
|
||||
if (typeof(getopt.place) == 'undefined')
|
||||
|
|
@ -66,7 +68,7 @@ function main(args) {
|
|||
else if (c == 'n') fn_name_conv = getopt.arg;
|
||||
}
|
||||
if (getopt.ind == args.length) {
|
||||
warn("Usage: k8 splice2bed.js <in.paf>");
|
||||
warn("Usage: k8 splice2bed.js <in.paf>|<in.sam>");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
@ -83,7 +85,7 @@ function main(args) {
|
|||
file.close();
|
||||
}
|
||||
|
||||
var file = new File(args[getopt.ind]);
|
||||
var file = args[getopt.ind] == '-'? new File() : new File(args[getopt.ind]);
|
||||
var buf = new Bytes();
|
||||
var a = [];
|
||||
while (file.readline(buf) >= 0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue