From 6762368cf0fdee289f757abb267e7ed43867ed52 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 4 May 2019 14:00:31 -0400 Subject: [PATCH] r940: added the splice:hq preset for high-quality CCS/mRNA splice alignment --- README.md | 4 ++-- main.c | 2 +- minimap2.1 | 8 +++++++- options.c | 4 +++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 11088d9..88d0edc 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ cd minimap2 && make ./minimap2 -ax sr ref.fa read1.fa read2.fa > aln.sam # short genomic paired-end reads ./minimap2 -ax splice ref.fa rna-reads.fa > aln.sam # spliced long reads (strand unknown) ./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore Direct RNA-seq -./minimap2 -ax splice -uf -C5 ref.fa query.fa > aln.sam # Final PacBio Iso-seq or traditional cDNA +./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam # Final PacBio Iso-seq or traditional cDNA ./minimap2 -cx asm5 asm1.fa asm2.fa > aln.paf # intra-species asm-to-asm alignment ./minimap2 -x ava-pb reads.fa reads.fa > overlaps.paf # PacBio read overlap ./minimap2 -x ava-ont reads.fa reads.fa > overlaps.paf # Nanopore read overlap @@ -139,7 +139,7 @@ Nanopore reads. #### Map long mRNA/cDNA reads ```sh -minimap2 -ax splice -uf -C5 ref.fa iso-seq.fq > aln.sam # PacBio Iso-seq/traditional cDNA +minimap2 -ax splice:hq -uf ref.fa iso-seq.fq > aln.sam # PacBio Iso-seq/traditional cDNA minimap2 -ax splice ref.fa nanopore-cdna.fa > aln.sam # Nanopore 2D cDNA-seq minimap2 -ax splice -uf -k14 ref.fa direct-rna.fq > aln.sam # Nanopore Direct RNA-seq minimap2 -ax splice --splice-flank=no SIRV.fa SIRV-seq.fa # mapping against SIRV control diff --git a/main.c b/main.c index 49b6ab1..3ed82cc 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.16-r937-dirty" +#define MM_VERSION "2.16-r940-dirty" #ifdef __linux__ #include diff --git a/minimap2.1 b/minimap2.1 index 73e1154..ec31765 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -1,4 +1,4 @@ -.TH minimap2 1 "30 April 2019" "minimap2-2.16-dirty (r938)" "Bioinformatics tools" +.TH minimap2 1 "4 May 2019" "minimap2-2.16-dirty (r940)" "Bioinformatics tools" .SH NAME .PP minimap2 - mapping and alignment between collections of DNA sequences @@ -568,6 +568,12 @@ costs are different during chaining; 4) the computation of the .RB ` ms ' tag ignores introns to demote hits to pseudogenes. .TP +.B splice:hq +Long-read splice alignment for PacBio CCS reads +.RB ( -xsplice +.B -C5 -O6,24 +.BR -B4 ). +.TP .B sr Short single-end reads without splicing .RB ( -k21 diff --git a/options.c b/options.c index ac1e5e1..2e6a43a 100644 --- a/options.c +++ b/options.c @@ -120,7 +120,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->mid_occ = 1000; mo->max_occ = 5000; mo->mini_batch_size = 50000000; - } else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) { + } else if (strncmp(preset, "splice", 6) == 0 || strcmp(preset, "cdna") == 0) { io->flag = 0, io->k = 15, io->w = 5; mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK; mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000; @@ -128,6 +128,8 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->noncan = 9; mo->junc_bonus = 9; mo->zdrop = 200, mo->zdrop_inv = 100; // because mo->a is halved + if (strcmp(preset, "splice:hq") == 0) + mo->junc_bonus = 5, mo->b = 4, mo->q = 6, mo->q2 = 24; } else return -1; return 0; }