From ddc31f57ba6d7b7255c0cf633faafdd1ff7e8a44 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 2 Feb 2018 13:57:08 -0500 Subject: [PATCH] Documented some k8 scripts; more coming --- misc/README.md | 77 ++++++++++++++++++++++++++++++++++------------ misc/gff2bed.js | 4 ++- misc/paf2aln.js | 2 ++ misc/sam2paf.js | 4 ++- misc/sim-eval.js | 4 ++- misc/sim-mason2.js | 2 ++ misc/sim-pbsim.js | 2 ++ misc/splice2bed.js | 6 ++-- 8 files changed, 77 insertions(+), 24 deletions(-) mode change 100644 => 100755 misc/gff2bed.js mode change 100644 => 100755 misc/paf2aln.js mode change 100644 => 100755 misc/sam2paf.js mode change 100644 => 100755 misc/sim-eval.js mode change 100644 => 100755 misc/sim-mason2.js mode change 100644 => 100755 misc/sim-pbsim.js mode change 100644 => 100755 misc/splice2bed.js diff --git a/misc/README.md b/misc/README.md index c6e3371..c1dc3e2 100644 --- a/misc/README.md +++ b/misc/README.md @@ -1,28 +1,67 @@ -The [K8 Javascript shell][k8] is needed to run Javascripts in this directory. -Precompiled k8 binaries for Mac and Linux can be found at the [K8 release -page][k8bin]. +## Getting Started +```sh +curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - +cp k8-0.2.4/k8-`uname -s` k8 # or better copy to a directory on PATH +minimap2 --cs test/MT-*.fa | paf2aln.js - | less # pretty print base alignment +sam2paf.js aln.sam.gz | less -S # convert SAM to PAF +gff2bed.js anno.gtf | less -S # convert GTF/GFF3 to BED12 +minimap2 -cx splice ref.fa rna-seq.fq | splice2bed.js - # convert splice aln to BED12 +``` -* [paf2aln.js](paf2aln.js): convert PAF to [MAF][maf] or BLAST-like output for - eyeballing. PAF has to be generated with minimap2 option `-S`, which writes - the aligned sequences to the `cs` tag. An example: - ```sh - ../minimap2 -S ../test/MT-*.fa | k8 paf2aln.js /dev/stdin - ``` +## Introduction -* [mapstat.js](mapstat.js): output basic statistics such as the number of - non-redundant mapped bases, number of split and secondary alignments and - number of long gaps. This scripts seamlessly works with both SAM and PAF. +This directory contains auxiliary scripts for format conversion, mapping +accuracy evaluation and miscellaneous purposes. These scripts *require* +the [k8 Javascript shell][k8] to run. On Linux or Mac, you can download +the precompiled k8 binary with: +```sh +curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - +cp k8-0.2.4/k8-`uname -s` k8 +``` +It is highly recommended to copy the executable `k8` to a directory on your +`PATH` such as `/usr/bin/env` can find them. -* [sim-pbsim.js](sim-pbsim.js): convert reads simulated with [PBSIM][pbsim] to - FASTA and encode the true mapping positions to read names in a format like - `S1_33!chr1!225258409!225267761!-`. +## Use Cases -* [sim-eval.js](sim-eval.js): evaluate mapping accuracy for FASTA generated - with [sim-pbsim.js](sim-pbsim.js) or [sim-mason2.js](sim-mason2.js). +### paf2aln.js: convert PAF to other formats -* [sam2paf.js](sam2paf.js): convert SAM to PAF. +Script [paf2aln.js](paf2aln.js) converts PAF with the [cs tag][cs] to +[MAF][maf] or BLAST-like output. It only works with minimap2 output generated +using the `--cs` tag. +### Evaluating mapping accuracy with simulated reads + +Script [sim-pbsim.js](sim-pbsim.js) converts the MAF output of [pbsim][pbsim] +to FASTQ and encodes the true mapping position in the read name in a format like +`S1_33!chr1!225258409!225267761!-`. Similarly, script +[sim-mason2.js](sim-mason2.js) converts [mason2][mason2] simulated SAM to +FASTQ. + +Script [sim-eval.js](sim-eval.js) evaluates mapped SAM/PAF. +``` +Q 60 32478 0 0.000000000 32478 +Q 22 16 1 0.000030775 32494 +Q 21 43 1 0.000061468 32537 +Q 19 73 1 0.000091996 32610 +Q 14 66 1 0.000122414 32676 +Q 10 27 3 0.000214048 32703 +Q 8 14 1 0.000244521 32717 +Q 7 13 2 0.000305530 32730 +Q 6 46 1 0.000335611 32776 +Q 3 10 1 0.000366010 32786 +Q 2 20 2 0.000426751 32806 +Q 1 248 94 0.003267381 33054 +Q 0 31 17 0.003778147 33085 +U 3 +``` +where each Q-line gives the quality threshold, the number of reads mapped with +mapping quality equal to or greater than the threshold, number of wrong +mappings, accumulative mapping error rate and the accumulative number of +mapped reads. The U-line gives the number of unmapped reads if they are present +in the SAM file. + +[cs]: https://github.com/lh3/minimap2#cs [k8]: https://github.com/attractivechaos/k8 -[k8bin]: https://github.com/attractivechaos/k8/releases [maf]: https://genome.ucsc.edu/FAQ/FAQformat#format5 [pbsim]: https://github.com/pfaucon/PBSIM-PacBio-Simulator +[mason2]: https://github.com/seqan/seqan/tree/master/apps/mason2 diff --git a/misc/gff2bed.js b/misc/gff2bed.js old mode 100644 new mode 100755 index 71bdc52..7507ec9 --- a/misc/gff2bed.js +++ b/misc/gff2bed.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + var getopt = function(args, ostr) { var oli; // option letter list index if (typeof(getopt.place) == 'undefined') @@ -99,7 +101,7 @@ function print_bed12(exons, cds_st, cds_en, is_short) var re_gtf = /(transcript_id|transcript_type|transcript_biotype|gene_name|transcript_name) "([^"]+)";/g; var re_gff3 = /(transcript_id|transcript_type|transcript_biotype|gene_name|transcript_name)=([^;]+)/g; var buf = new Bytes(); -var file = new File(arguments[getopt.ind]); +var file = arguments[getopt.ind] == '-'? new File() : new File(arguments[getopt.ind]); var exons = [], cds_st = 1<<30, cds_en = 0, last_id = null; while (file.readline(buf) >= 0) { diff --git a/misc/paf2aln.js b/misc/paf2aln.js old mode 100644 new mode 100755 index 813c192..3837120 --- a/misc/paf2aln.js +++ b/misc/paf2aln.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + var getopt = function(args, ostr) { var oli; // option letter list index if (typeof(getopt.place) == 'undefined') diff --git a/misc/sam2paf.js b/misc/sam2paf.js old mode 100644 new mode 100755 index e0bcd2e..cdfcb54 --- a/misc/sam2paf.js +++ b/misc/sam2paf.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + var getopt = function(args, ostr) { var oli; // option letter list index if (typeof(getopt.place) == 'undefined') @@ -40,7 +42,7 @@ var c, pri_only = false; while ((c = getopt(arguments, "p")) != null) if (c == 'p') pri_only = true; -var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]); +var file = arguments.length == getopt.ind || arguments[getopt.ind] == "-"? new File() : new File(arguments[getopt.ind]); var buf = new Bytes(); var re = /(\d+)([MIDSHNX=])/g; diff --git a/misc/sim-eval.js b/misc/sim-eval.js old mode 100644 new mode 100755 index 902ad85..88ff770 --- a/misc/sim-eval.js +++ b/misc/sim-eval.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + var getopt = function(args, ostr) { var oli; // option letter list index if (typeof(getopt.place) == 'undefined') @@ -44,7 +46,7 @@ while ((c = getopt(arguments, "Q:r:m:c")) != null) { else if (c == 'c') cap_short_mapq = true; } -var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]); +var file = arguments.length == getopt.ind || arguments[getopt.ind] == '-'? new File() : new File(arguments[getopt.ind]); var buf = new Bytes(); var tot = [], err = []; diff --git a/misc/sim-mason2.js b/misc/sim-mason2.js old mode 100644 new mode 100755 index 5c66cab..2136236 --- a/misc/sim-mason2.js +++ b/misc/sim-mason2.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + Bytes.prototype.reverse = function() { for (var i = 0; i < this.length>>1; ++i) { diff --git a/misc/sim-pbsim.js b/misc/sim-pbsim.js old mode 100644 new mode 100755 index 1c92e21..b19390c --- a/misc/sim-pbsim.js +++ b/misc/sim-pbsim.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + Bytes.prototype.reverse = function() { for (var i = 0; i < this.length>>1; ++i) { diff --git a/misc/splice2bed.js b/misc/splice2bed.js old mode 100644 new mode 100755 index cdd965f..fd9ca91 --- a/misc/splice2bed.js +++ b/misc/splice2bed.js @@ -1,3 +1,5 @@ +#!/usr/bin/env k8 + var getopt = function(args, ostr) { var oli; // option letter list index if (typeof(getopt.place) == 'undefined') @@ -66,7 +68,7 @@ function main(args) { else if (c == 'n') fn_name_conv = getopt.arg; } if (getopt.ind == args.length) { - warn("Usage: k8 splice2bed.js "); + warn("Usage: k8 splice2bed.js |"); exit(1); } @@ -83,7 +85,7 @@ function main(args) { file.close(); } - var file = new File(args[getopt.ind]); + var file = args[getopt.ind] == '-'? new File() : new File(args[getopt.ind]); var buf = new Bytes(); var a = []; while (file.readline(buf) >= 0) {