Merge branch 'dev'

This commit is contained in:
Heng Li 2017-08-08 21:30:32 -04:00
commit 3dbe23b34e
30 changed files with 4436 additions and 28 deletions

5
.travis.yml 100644
View File

@ -0,0 +1,5 @@
language: c
compiler:
- gcc
- clang
script: make

View File

@ -1,7 +1,9 @@
#include <stdarg.h> #include <stdarg.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <assert.h>
#include <stdio.h> #include <stdio.h>
#include "kalloc.h"
#include "mmpriv.h" #include "mmpriv.h"
static inline void str_enlarge(kstring_t *s, int l) static inline void str_enlarge(kstring_t *s, int l)
@ -54,6 +56,63 @@ static void mm_sprintf_lite(kstring_t *s, const char *fmt, ...)
s->s[s->l] = 0; s->s[s->l] = 0;
} }
static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r)
{
extern unsigned char seq_nt4_table[256];
int i, q_off, t_off;
uint8_t *qseq, *tseq;
char *tmp;
if (r->p == 0) return;
mm_sprintf_lite(s, "\tcs:Z:");
qseq = (uint8_t*)kmalloc(km, r->qe - r->qs);
tseq = (uint8_t*)kmalloc(km, r->re - r->rs);
tmp = (char*)kmalloc(km, r->re - r->rs > r->qe - r->qs? r->re - r->rs + 1 : r->qe - r->qs + 1);
mm_idx_getseq(mi, r->rid, r->rs, r->re, tseq);
if (!r->rev) {
for (i = r->qs; i < r->qe; ++i)
qseq[i - r->qs] = seq_nt4_table[(uint8_t)t->seq[i]];
} else {
for (i = r->qs; i < r->qe; ++i) {
uint8_t c = seq_nt4_table[(uint8_t)t->seq[i]];
qseq[r->qe - i - 1] = c >= 4? 4 : 3 - c;
}
}
for (i = q_off = t_off = 0; i < r->p->n_cigar; ++i) {
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
assert(op >= 0 && op <= 2);
if (op == 0) {
int l_tmp = 0;
for (j = 0; j < len; ++j) {
if (qseq[q_off + j] != tseq[t_off + j]) {
if (l_tmp > 0) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
l_tmp = 0;
}
mm_sprintf_lite(s, "*%c%c", "acgtn"[tseq[t_off + j]], "acgtn"[qseq[q_off + j]]);
} else tmp[l_tmp++] = "ACGTN"[qseq[q_off + j]];
}
if (l_tmp > 0) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
}
q_off += len, t_off += len;
} else if (op == 1) {
for (j = 0, tmp[len] = 0; j < len; ++j)
tmp[j] = "acgtn"[qseq[q_off + j]];
mm_sprintf_lite(s, "+%s", tmp);
q_off += len;
} else if (op == 2) {
for (j = 0, tmp[len] = 0; j < len; ++j)
tmp[j] = "acgtn"[tseq[t_off + j]];
mm_sprintf_lite(s, "-%s", tmp);
t_off += len;
}
}
assert(t_off == r->re - r->rs && q_off == r->qe - r->qs);
kfree(km, qseq); kfree(km, tseq); kfree(km, tmp);
}
static inline void write_tags(kstring_t *s, const mm_reg1_t *r) static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
{ {
int type = r->inv? 'I' : r->id == r->parent? 'P' : 'S'; int type = r->inv? 'I' : r->id == r->parent? 'P' : 'S';
@ -63,7 +122,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi); if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
} }
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r) void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag)
{ {
s->l = 0; s->l = 0;
mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]); mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
@ -74,12 +133,14 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
else mm_sprintf_lite(s, "\t%d\t%d", r->fuzzy_mlen, r->fuzzy_blen); else mm_sprintf_lite(s, "\t%d\t%d", r->fuzzy_mlen, r->fuzzy_blen);
mm_sprintf_lite(s, "\t%d", r->mapq); mm_sprintf_lite(s, "\t%d", r->mapq);
write_tags(s, r); write_tags(s, r);
if (r->p) { if (r->p && (opt_flag & MM_F_OUT_CG)) {
uint32_t k; uint32_t k;
mm_sprintf_lite(s, "\tcg:Z:"); mm_sprintf_lite(s, "\tcg:Z:");
for (k = 0; k < r->p->n_cigar; ++k) for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]); mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
} }
if (r->p && (opt_flag & MM_F_OUT_CS))
write_cs(km, s, mi, t, r);
} }
static char comp_tab[] = { static char comp_tab[] = {

View File

@ -1,5 +1,6 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h>
#include "ksw2.h" #include "ksw2.h"
#ifdef __SSE2__ #ifdef __SSE2__
@ -66,7 +67,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (w < 0) w = tlen > qlen? tlen : qlen; if (w < 0) w = tlen > qlen? tlen : qlen;
wl = wr = w; wl = wr = w;
tlen_ = (tlen + 15) / 16; tlen_ = (tlen + 15) / 16;
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1; n_col_ = qlen < tlen? qlen : tlen;
n_col_ = ((n_col_ < w + 1? n_col_ : w + 1) + 15) / 16 + 1;
qlen_ = (qlen + 15) / 16; qlen_ = (qlen + 15) / 16;
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) { for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
max_sc = max_sc > mat[t]? max_sc : mat[t]; max_sc = max_sc > mat[t]? max_sc : mat[t];
@ -161,6 +163,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
x21_ = _mm_cvtsi32_si128((uint8_t)x21); x21_ = _mm_cvtsi32_si128((uint8_t)x21);
v1_ = _mm_cvtsi32_si128((uint8_t)v1); v1_ = _mm_cvtsi32_si128((uint8_t)v1);
st_ = st / 16, en_ = en / 16; st_ = st / 16, en_ = en / 16;
assert(en_ - st_ + 1 <= n_col_);
if (!with_cigar) { // score only if (!with_cigar) { // score only
for (t = st_; t <= en_; ++t) { for (t = st_; t <= en_; ++t) {
__m128i z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp; __m128i z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;

View File

@ -1,4 +1,5 @@
#include <string.h> #include <string.h>
#include <assert.h>
#include "ksw2.h" #include "ksw2.h"
#ifdef __SSE2__ #ifdef __SSE2__
@ -58,7 +59,8 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (w < 0) w = tlen > qlen? tlen : qlen; if (w < 0) w = tlen > qlen? tlen : qlen;
wl = wr = w; wl = wr = w;
tlen_ = (tlen + 15) / 16; tlen_ = (tlen + 15) / 16;
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1; n_col_ = qlen < tlen? qlen : tlen;
n_col_ = ((n_col_ < w + 1? n_col_ : w + 1) + 15) / 16 + 1;
qlen_ = (qlen + 15) / 16; qlen_ = (qlen + 15) / 16;
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) { for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
max_sc = max_sc > mat[t]? max_sc : mat[t]; max_sc = max_sc > mat[t]? max_sc : mat[t];
@ -130,6 +132,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
x1_ = _mm_cvtsi32_si128(x1); x1_ = _mm_cvtsi32_si128(x1);
v1_ = _mm_cvtsi32_si128(v1); v1_ = _mm_cvtsi32_si128(v1);
st_ = st / 16, en_ = en / 16; st_ = st / 16, en_ = en / 16;
assert(en_ - st_ + 1 <= n_col_);
if (!with_cigar) { // score only if (!with_cigar) { // score only
for (t = st_; t <= en_; ++t) { for (t = st_; t <= en_; ++t) {
__m128i z, a, b, xt1, vt1, ut, tmp; __m128i z, a, b, xt1, vt1, ut, tmp;

8
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#define MM_VERSION "2.0-r275" #define MM_VERSION "2.0-r276-dirty"
void liftrlimit() void liftrlimit()
{ {
@ -54,7 +54,7 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime(); mm_realtime0 = realtime();
mm_mapopt_init(&opt); mm_mapopt_init(&opt);
while ((c = getopt_long(argc, argv, "aw:k:K:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Q", long_options, &long_idx)) >= 0) { while ((c = getopt_long(argc, argv, "aSw:k:K:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Q", long_options, &long_idx)) >= 0) {
if (c == 'w') w = atoi(optarg), idx_par_set = 1; if (c == 'w') w = atoi(optarg), idx_par_set = 1;
else if (c == 'k') k = atoi(optarg), idx_par_set = 1; else if (c == 'k') k = atoi(optarg), idx_par_set = 1;
else if (c == 'H') is_hpc = 1, idx_par_set = 1; else if (c == 'H') is_hpc = 1, idx_par_set = 1;
@ -67,7 +67,8 @@ int main(int argc, char *argv[])
else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'N') opt.best_n = atoi(optarg);
else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'c') opt.flag |= MM_F_CIGAR; else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
else if (c == 'S') opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR; else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
else if (c == 'Q') opt.flag |= MM_F_NO_QUAL; else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
@ -166,6 +167,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -Q ignore base quality in the input\n"); fprintf(stderr, " -Q ignore base quality in the input\n");
fprintf(stderr, " -a output in the SAM format (PAF by default)\n"); fprintf(stderr, " -a output in the SAM format (PAF by default)\n");
fprintf(stderr, " -c output CIGAR in PAF\n"); fprintf(stderr, " -c output CIGAR in PAF\n");
fprintf(stderr, " -S output the cs tag in PAF\n");
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
fprintf(stderr, " -K NUM minibatch size [200M]\n"); fprintf(stderr, " -K NUM minibatch size [200M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);

5
map.c
View File

@ -338,16 +338,18 @@ static void *worker_pipeline(void *shared, int step, void *in)
kt_for(p->n_threads, worker_for, in, ((step_t*)in)->n_seq); kt_for(p->n_threads, worker_for, in, ((step_t*)in)->n_seq);
return in; return in;
} else if (step == 2) { // step 2: output } else if (step == 2) { // step 2: output
void *km = 0;
step_t *s = (step_t*)in; step_t *s = (step_t*)in;
const mm_idx_t *mi = p->mi; const mm_idx_t *mi = p->mi;
for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]); for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]);
free(s->buf); free(s->buf);
if ((p->opt->flag & MM_F_OUT_CS) && !(mm_dbg_flag & MM_DBG_NO_KALLOC)) km = km_init();
for (i = 0; i < s->n_seq; ++i) { for (i = 0; i < s->n_seq; ++i) {
mm_bseq1_t *t = &s->seq[i]; mm_bseq1_t *t = &s->seq[i];
for (j = 0; j < s->n_reg[i]; ++j) { for (j = 0; j < s->n_reg[i]; ++j) {
mm_reg1_t *r = &s->reg[i][j]; mm_reg1_t *r = &s->reg[i][j];
if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i]); if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i]);
else mm_write_paf(&p->str, mi, t, r); else mm_write_paf(&p->str, mi, t, r, km, p->opt->flag);
puts(p->str.s); puts(p->str.s);
} }
if (s->n_reg[i] == 0 && (p->opt->flag & MM_F_OUT_SAM)) { if (s->n_reg[i] == 0 && (p->opt->flag & MM_F_OUT_SAM)) {
@ -360,6 +362,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
if (s->seq[i].qual) free(s->seq[i].qual); if (s->seq[i].qual) free(s->seq[i].qual);
} }
free(s->reg); free(s->n_reg); free(s->seq); free(s->reg); free(s->n_reg); free(s->seq);
km_destroy(km);
if (mm_verbose >= 3) if (mm_verbose >= 3)
fprintf(stderr, "[M::%s::%.3f*%.2f] mapped %d sequences\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), s->n_seq); fprintf(stderr, "[M::%s::%.3f*%.2f] mapped %d sequences\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), s->n_seq);
free(s); free(s);

View File

@ -12,6 +12,8 @@
#define MM_F_CIGAR 0x04 #define MM_F_CIGAR 0x04
#define MM_F_OUT_SAM 0x08 #define MM_F_OUT_SAM 0x08
#define MM_F_NO_QUAL 0x10 #define MM_F_NO_QUAL 0x10
#define MM_F_OUT_CG 0x20
#define MM_F_OUT_CS 0x40
#define MM_IDX_MAGIC "MMI\2" #define MM_IDX_MAGIC "MMI\2"

28
misc/README.md 100644
View File

@ -0,0 +1,28 @@
The [K8 Javascript shell][k8] is needed to run Javascripts in this directory.
Precompiled k8 binaries for Mac and Linux can be found at the [K8 release
page][k8bin].
* [paf2aln.js](paf2aln.js): convert PAF to [MAF][maf] or BLAST-like output for
eyeballing. PAF has to be generated with minimap2 option `-S`, which writes
the aligned sequences to the `cs` tag. An example:
```sh
../minimap2 -S ../test/MT-*.fa | k8 paf2aln.js /dev/stdin
```
* [mapstat.js](mapstat.js): output basic statistics such as the number of
non-redundant mapped bases, number of split and secondary alignments and
number of long gaps. This scripts seamlessly works with both SAM and PAF.
* [sim-pbsim.js](sim-pbsim.js): convert reads simulated with [PBSIM][pbsim] to
FASTA and encode the true mapping positions to read names in a format like
`S1_33!chr1!225258409!225267761!-`.
* [sim-eval.js](sim-eval.js): evaluate mapping accuracy for FASTA generated
with [sim-pbsim.js](sim-pbsim.js) or [sim-mason2.js](sim-mason2.js).
* [sam2paf.js](sam2paf.js): convert SAM to PAF.
[k8]: https://github.com/attractivechaos/k8
[k8bin]: https://github.com/attractivechaos/k8/releases
[maf]: https://genome.ucsc.edu/FAQ/FAQformat#format5
[pbsim]: https://github.com/pfaucon/PBSIM-PacBio-Simulator

171
misc/paf2aln.js 100644
View File

@ -0,0 +1,171 @@
var getopt = function(args, ostr) {
var oli; // option letter list index
if (typeof(getopt.place) == 'undefined')
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
if (getopt.place == -1) { // update scanning pointer
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
getopt.place = -1;
return null;
}
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
++getopt.ind;
getopt.place = -1;
return null;
}
}
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
if (getopt.place < 0) ++getopt.ind;
return '?';
}
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
getopt.arg = null;
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
} else { // need an argument
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
getopt.arg = args[getopt.ind].substr(getopt.place);
else if (args.length <= ++getopt.ind) { // no arg
getopt.place = -1;
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
return '?';
} else getopt.arg = args[getopt.ind]; // white space
getopt.place = -1;
++getopt.ind;
}
return optopt;
}
var c, maf_out = false, line_len = 80;
while ((c = getopt(arguments, "ml:")) != null) {
if (c == 'm') maf_out = true;
else if (c == 'l') line_len = parseInt(getopt.arg); // TODO: not implemented yet
}
if (line_len == 0) line_len = 0x7fffffff;
if (getopt.ind == arguments.length) {
print("Usage: k8 paf2aln.js [options] <with-cs.paf>");
print("Options:");
print(" -m MAF output (BLAST-like output by default)");
print(" -l INT line length in BLAST-like output [80]");
print("");
print("Note: this script only works when minimap2 is run with option '-S'");
exit(1);
}
function padding_str(x, len, right)
{
var s = x.toString();
if (s.length < len) {
if (right) s += Array(len - s.length + 1).join(" ");
else s = Array(len - s.length + 1).join(" ") + s;
}
return s;
}
function update_aln(s_ref, s_qry, s_mid, type, seq, slen)
{
var l = type == '*'? 1 : seq.length;
if (type == '=') {
s_ref.set(seq);
s_qry.set(seq);
s_mid.set(Array(l+1).join("|"));
slen[0] += l, slen[1] += l;
} else if (type == '*') {
s_ref.set(seq.charAt(0));
s_qry.set(seq.charAt(1));
s_mid.set(' ');
slen[0] += 1, slen[1] += 1;
} else if (type == '+') {
s_ref.set(Array(l+1).join("-"));
s_qry.set(seq);
s_mid.set(Array(l+1).join(" "));
slen[1] += l;
} else if (type == '-') {
s_ref.set(seq);
s_qry.set(Array(l+1).join("-"));
s_mid.set(Array(l+1).join(" "));
slen[0] += l;
}
}
function print_aln(rs, qs, strand, slen, elen, s_ref, s_qry, s_mid)
{
print(["Ref+:", padding_str(rs + slen[0] + 1, 10, false), s_ref.toString(), padding_str(rs + elen[0], 10, true)].join(" "));
print(" " + s_mid.toString());
var st, en;
if (strand == '+') st = qs + slen[1] + 1, en = qs + elen[1];
else st = qs - slen[1], en = qs - elen[1] + 1;
print(["Qry" + strand + ":", padding_str(st, 10, false), s_qry.toString(), padding_str(en , 10, true)].join(" "));
}
var s_ref = new Bytes(), s_qry = new Bytes(), s_mid = new Bytes();
var re = /([=\-\+\*])([A-Za-z]+)/g;
var buf = new Bytes();
var file = new File(arguments[getopt.ind]);
if (maf_out) print("##maf version=1\n");
while (file.readline(buf) >= 0) {
var m, line = buf.toString();
var t = line.split("\t", 12);
if ((m = /\tcs:Z:(\S+)/.exec(line)) == null) continue;
var cs = m[1];
s_ref.length = s_qry.length = s_mid.length = 0;
var slen = [0, 0], elen = [0, 0];
if (maf_out) {
while ((m = re.exec(cs)) != null)
update_aln(s_ref, s_qry, s_mid, m[1], m[2], elen);
if (maf_out) {
var score = (m = /\tAS:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : 0;
var len = t[0].length > t[5].length? t[0].length : t[5].length;
print("a " + score);
print(["s", padding_str(t[5], len, true), padding_str(t[7], 10, false), padding_str(parseInt(t[8]) - parseInt(t[7]), 10, false),
"+", padding_str(t[6], 10, false), s_ref.toString()].join(" "));
var qs, qe, ql = parseInt(t[1]);
if (t[4] == '+') {
qs = parseInt(t[2]);
qe = parseInt(t[3]);
} else {
qs = ql - parseInt(t[3]);
qe = ql - parseInt(t[2]);
}
print(["s", padding_str(t[0], len, true), padding_str(qs, 10, false), padding_str(qe - qs, 10, false),
t[4], padding_str(ql, 10, false), s_qry.toString()].join(" "));
print("");
}
} else {
line = line.replace(/\tc[sg]:Z:\S+/g, "");
print('>' + line);
var rs = parseInt(t[7]), qs = t[4] == '+'? parseInt(t[2]) : parseInt(t[3]);
var n_blocks = 0;
while ((m = re.exec(cs)) != null) {
var start = 0, rest = m[1] == '*'? 1 : m[2].length;
while (rest > 0) {
var l_proc;
if (s_ref.length + rest >= line_len) {
l_proc = line_len - s_ref.length;
update_aln(s_ref, s_qry, s_mid, m[1], m[1] == '*'? m[2] : m[2].substr(start, l_proc), elen);
if (n_blocks > 0) print("");
print_aln(rs, qs, t[4], slen, elen, s_ref, s_qry, s_mid);
++n_blocks;
s_ref.length = s_qry.length = s_mid.length = 0;
slen[0] = elen[0], slen[1] = elen[1];
} else {
l_proc = rest;
update_aln(s_ref, s_qry, s_mid, m[1], m[1] == '*'? m[2] : m[2].substr(start, l_proc), elen);
}
rest -= l_proc, start += l_proc;
}
}
if (s_ref.length > 0) {
if (n_blocks > 0) print("");
print_aln(rs, qs, t[4], slen, elen, s_ref, s_qry, s_mid);
++n_blocks;
}
print("//");
}
}
file.close();
buf.destroy();
s_ref.destroy(); s_qry.destroy(); s_mid.destroy();

111
misc/sam2paf.js 100644
View File

@ -0,0 +1,111 @@
var getopt = function(args, ostr) {
var oli; // option letter list index
if (typeof(getopt.place) == 'undefined')
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
if (getopt.place == -1) { // update scanning pointer
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
getopt.place = -1;
return null;
}
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
++getopt.ind;
getopt.place = -1;
return null;
}
}
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
if (getopt.place < 0) ++getopt.ind;
return '?';
}
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
getopt.arg = null;
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
} else { // need an argument
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
getopt.arg = args[getopt.ind].substr(getopt.place);
else if (args.length <= ++getopt.ind) { // no arg
getopt.place = -1;
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
return '?';
} else getopt.arg = args[getopt.ind]; // white space
getopt.place = -1;
++getopt.ind;
}
return optopt;
}
var c, pri_only = false;
while ((c = getopt(arguments, "p")) != null)
if (c == 'p') pri_only = true;
var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]);
var buf = new Bytes();
var re = /(\d+)([MIDSHNX=])/g;
var len = {}, lineno = 0;
while (file.readline(buf) >= 0) {
var m, n_cigar = 0, line = buf.toString();
++lineno;
if (line.charAt(0) == '@') {
if (/^@SQ/.test(line)) {
var name = (m = /\tSN:(\S+)/.exec(line)) != null? m[1] : null;
var l = (m = /\tLN:(\d+)/.exec(line)) != null? parseInt(m[1]) : null;
if (name != null && l != null) len[name] = l;
}
continue;
}
var t = line.split("\t");
var flag = parseInt(t[1]);
if (t[9] != '*' && t[10] != '*' && t[9].length != t[10].length) throw Error("ERROR at line " + lineno + ": inconsistent SEQ and QUAL lengths - " + t[9].length + " != " + t[10].length);
if (t[2] == '*' || (flag&4)) continue;
if (pri_only && (flag&0x100)) continue;
var tlen = len[t[2]];
if (tlen == null) throw Error("ERROR at line " + lineno + ": can't find the length of contig " + t[2]);
var nn = (m = /\tnn:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : 0;
var NM = (m = /\tNM:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : null;
var have_NM = NM == null? false : true;
NM += nn;
var clip = [0, 0], I = [0, 0], D = [0, 0], M = 0, N = 0, ql = 0, tl = 0, mm = 0, ext_cigar = false;
while ((m = re.exec(t[5])) != null) {
var l = parseInt(m[1]);
if (m[2] == 'M') M += l, ql += l, tl += l, ext_cigar = false;
else if (m[2] == 'I') ++I[0], I[1] += l, ql += l;
else if (m[2] == 'D') ++D[0], D[1] += l, tl += l;
else if (m[2] == 'N') N += l, tl += l;
else if (m[2] == 'S') clip[M == 0? 0 : 1] = l, ql += l;
else if (m[2] == 'H') clip[M == 0? 0 : 1] = l;
else if (m[2] == '=') M += l, ql += l, tl += l, ext_cigar = true;
else if (m[2] == 'X') M += l, ql += l, tl += l, mm += l, ext_cigar = true;
++n_cigar;
}
if (n_cigar > 65535)
warn("WARNING at line " + lineno + ": " + n_cigar + " CIGAR operations");
if (tl + parseInt(t[3]) - 1 > tlen) {
warn("WARNING at line " + lineno + ": alignment end position larger than ref length; skipped");
continue;
}
if (t[9] != '*' && t[9].length != ql) {
warn("WARNING at line " + lineno + ": SEQ length inconsistent with CIGAR (" + t[9].length + " != " + ql + "); skipped");
continue;
}
if (!have_NM || ext_cigar) NM = I[1] + D[1] + mm;
if (NM < I[1] + D[1] + mm) {
warn("WARNING at line " + lineno + ": NM is less than the total number of gaps (" + NM + " < " + (I[1]+D[1]+mm) + ")");
NM = I[1] + D[1] + mm;
}
var extra = ["mm:i:"+(NM-I[1]-D[1]), "io:i:"+I[0], "in:i:"+I[1], "do:i:"+D[0], "dn:i:"+D[1]];
var match = M - (NM - I[1] - D[1]);
var blen = M + I[1] + D[1];
var qlen = M + I[1] + clip[0] + clip[1];
var qs, qe;
if (flag&16) qs = clip[1], qe = qlen - clip[0];
else qs = clip[0], qe = qlen - clip[1];
var ts = parseInt(t[3]) - 1, te = ts + M + D[1] + N;
var a = [t[0], qlen, qs, qe, flag&16? '-' : '+', t[2], tlen, ts, te, match, blen, t[4]];
print(a.join("\t"), extra.join("\t"));
}
buf.destroy();
file.close();

View File

@ -36,11 +36,12 @@ var getopt = function(args, ostr) {
return optopt; return optopt;
} }
var c, max_mapq = 60, mode = 0, err_out_q = 256, print_err = false, ovlp_ratio = 0.333; var c, max_mapq = 60, mode = 0, err_out_q = 256, print_err = false, ovlp_ratio = 0.1, cap_short_mapq = false;
while ((c = getopt(arguments, "Q:r:m:")) != null) { while ((c = getopt(arguments, "Q:r:m:c")) != null) {
if (c == 'Q') err_out_q = parseInt(getopt.arg), print_err = true; if (c == 'Q') err_out_q = parseInt(getopt.arg), print_err = true;
else if (c == 'r') ovlp_ratio = parseFloat(getopt.arg); else if (c == 'r') ovlp_ratio = parseFloat(getopt.arg);
else if (c == 'm') mode = parseInt(getopt.arg); else if (c == 'm') mode = parseInt(getopt.arg);
else if (c == 'c') cap_short_mapq = true;
} }
var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]); var file = arguments.length == getopt.ind? new File() : new File(arguments[getopt.ind]);
@ -68,20 +69,27 @@ function is_correct(s, b)
function count_err(qname, a, tot, err, mode) function count_err(qname, a, tot, err, mode)
{ {
var s = qname.split("!");
if (a.length == 0) return; if (a.length == 0) return;
if (s.length < 5 || (s[4] != '+' && s[4] != '-'))
throw Error("Failed to parse pbsim2fa read names '" + qname + "'"); var m, s;
s[2] = parseInt(s[2]); if ((m = /^(\S+)!(\S+)!(\d+)!(\d+)!([\+\-])$/.exec(qname)) != null) { // pbsim single-end reads
s[3] = parseInt(s[3]); s = [m[1], m[2], parseInt(m[3]), parseInt(m[4]), m[5]];
s.shift(); // skip pbsim orginal read name } else if ((m = /^(\S+)!(\S+)!(\d+)_(\d+)!(\d+)_(\d+)!([\+\-])([\+\-])\/([12])$/.exec(qname)) != null) { // mason2 paired-end reads
if (m[9] == '1') {
s = [m[1], m[2], parseInt(m[3]), parseInt(m[5]), m[7]];
} else {
s = [m[1], m[2], parseInt(m[4]), parseInt(m[6]), m[8]];
}
} else throw Error("Failed to parse simulated read names '" + qname + "'");
s.shift(); // skip the orginal read name
if (mode == 0 || mode == 1) { // longest only or first only if (mode == 0 || mode == 1) { // longest only or first only
var max_i = 0; var max_i = 0;
if (mode == 0) { if (mode == 0) { // longest only
var max = 0; var max = 0;
for (var i = 0; i < a.length; ++i) for (var i = 0; i < a.length; ++i)
if (a[i][2] - a[i][1] > max) if (a[i][5] > max)
max = a[i][2] - a[i][1], max_i = i; max = a[i][5], max_i = i;
} }
var mapq = a[max_i][4]; var mapq = a[max_i][4];
++tot[mapq]; ++tot[mapq];
@ -92,6 +100,14 @@ function count_err(qname, a, tot, err, mode)
} }
} else if (mode == 2) { // all primary mode } else if (mode == 2) { // all primary mode
var max_err_mapq = -1, max_mapq = 0, max_err_i = -1; var max_err_mapq = -1, max_mapq = 0, max_err_i = -1;
if (cap_short_mapq) {
var max = 0, max_q = 0;
for (var i = 0; i < a.length; ++i)
if (a[i][5] > max)
max = a[i][5], max_q = a[i][4];
for (var i = 0; i < a.length; ++i)
a[i][4] = max_q < a[i][4]? max_q : a[i][4];
}
for (var i = 0; i < a.length; ++i) { for (var i = 0; i < a.length; ++i) {
max_mapq = max_mapq > a[i][4]? max_mapq : a[i][4]; max_mapq = max_mapq > a[i][4]? max_mapq : a[i][4];
if (!is_correct(s, a[i])) if (!is_correct(s, a[i]))
@ -106,22 +122,53 @@ function count_err(qname, a, tot, err, mode)
} }
} }
var lineno = 0, last = null, a = []; var lineno = 0, last = null, a = [], n_unmapped = null;
var re_cigar = /(\d+)([MIDSHN])/g;
while (file.readline(buf) >= 0) { while (file.readline(buf) >= 0) {
var line = buf.toString(); var m, line = buf.toString();
++lineno; ++lineno;
if (line[0] != '@') { if (line[0] != '@') {
var t = line.split("\t"); var t = line.split("\t");
if (last != t[0]) {
if (last != null) count_err(last, a, tot, err, mode);
a = [], last = t[0];
}
if (t[4] == '+' || t[4] == '-') { // PAF if (t[4] == '+' || t[4] == '-') { // PAF
if (last != t[0]) {
if (last != null) count_err(last, a, tot, err, mode);
a = [], last = t[0];
}
if (/\ts1:i:\d+/.test(line) && !/\ts2:i:\d+/.test(line)) // secondary alignment in minimap2 PAF if (/\ts1:i:\d+/.test(line) && !/\ts2:i:\d+/.test(line)) // secondary alignment in minimap2 PAF
continue; continue;
var mapq = parseInt(t[11]); var mapq = parseInt(t[11]);
if (mapq > max_mapq) mapq = max_mapq; if (mapq > max_mapq) mapq = max_mapq;
a.push([t[5], parseInt(t[7]), parseInt(t[8]), t[4], mapq]); a.push([t[5], parseInt(t[7]), parseInt(t[8]), t[4], mapq, parseInt(t[9])]);
} else { // SAM
var flag = parseInt(t[1]);
var read_no = flag>>6&0x3;
var qname = read_no == 1 || read_no == 2? t[0] + '/' + read_no : t[0];
if (last != qname) {
if (last != null) count_err(last, a, tot, err, mode);
a = [], last = qname;
}
if (flag&0x100) continue; // secondary alignment
if ((flag&0x4) || t[2] == '*') { // unmapped
if (n_unmapped == null) n_unmapped = 0;
++n_unmapped;
continue;
}
var mapq = parseInt(t[4]);
if (mapq > max_mapq) mapq = max_mapq;
var pos = parseInt(t[3]) - 1, pos_end = pos;
var n_gap = 0, mlen = 0;
while ((m = re_cigar.exec(t[5])) != null) {
var len = parseInt(m[1]);
if (m[2] == 'M') pos_end += len, mlen += len;
else if (m[2] == 'I') n_gap += len;
else if (m[2] == 'D') n_gap += len, pos_end += len;
}
var score = pos_end - pos;
if ((m = /\tNM:i:(\d+)/.exec(line)) != null) {
var NM = parseInt(m[1]);
if (NM >= n_gap) score = mlen - (NM - n_gap);
}
a.push([t[2], pos, pos_end, (flag&16)? '-' : '+', mapq, score]);
} }
} }
} }
@ -141,3 +188,4 @@ for (var q = max_mapq; q >= 0; --q) {
sum_tot2 += tot[q], sum_err2 += err[q]; sum_tot2 += tot[q], sum_err2 += err[q];
} }
print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9)); print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9));
if (n_unmapped != null) print('U', n_unmapped);

105
misc/sim-mason2.js 100644
View File

@ -0,0 +1,105 @@
Bytes.prototype.reverse = function()
{
for (var i = 0; i < this.length>>1; ++i) {
var tmp = this[i];
this[i] = this[this.length - i - 1];
this[this.length - i - 1] = tmp;
}
}
// reverse complement a DNA string
Bytes.prototype.revcomp = function()
{
if (Bytes.rctab == null) {
var s1 = 'WSATUGCYRKMBDHVNwsatugcyrkmbdhvn';
var s2 = 'WSTAACGRYMKVHDBNwstaacgrymkvhdbn';
Bytes.rctab = [];
for (var i = 0; i < 256; ++i) Bytes.rctab[i] = 0;
for (var i = 0; i < s1.length; ++i)
Bytes.rctab[s1.charCodeAt(i)] = s2.charCodeAt(i);
}
for (var i = 0; i < this.length>>1; ++i) {
var tmp = this[this.length - i - 1];
this[this.length - i - 1] = Bytes.rctab[this[i]];
this[i] = Bytes.rctab[tmp];
}
if (this.length&1)
this[this.length>>1] = Bytes.rctab[this[this.length>>1]];
}
if (arguments.length == 0) {
print("Usage: k8 sim-mason2.js <mason.sam>");
exit(1);
}
function print_se(a)
{
print('@' + a.slice(0, 5).join("!") + " " + a[8]);
print(a[5]);
print("+");
print(a[6]);
}
var buf = new Bytes(), buf2 = new Bytes();
var file = new File(arguments[0]);
var re = /(\d+)([MIDSHN])/g;
var last = null;
while (file.readline(buf) >= 0) {
var t = buf.toString().split("\t");
if (t[0].charAt(0) == '@') continue;
var m, l_ref = 0;
while ((m = re.exec(t[5])) != null)
if (m[2] == 'D' || m[2] == 'M' || m[2] == 'N')
l_ref += parseInt(m[1]);
var flag = parseInt(t[1]);
var rev = !!(flag&16);
var seq, qual;
if (rev) {
buf2.length = 0;
buf2.set(t[9], 0);
buf2.revcomp();
seq = buf2.toString();
buf2.set(t[10], 0);
buf2.reverse();
qual = buf2.toString();
} else seq = t[9], qual = t[10];
var qname = t[0];
qname = qname.replace(/^simulated./, "");
var chr = t[2];
var pos = parseInt(t[3]) - 1;
var strand = (flag&16)? '-' : '+';
var read_no = flag&0xc0;
if (read_no == 0x40) read_no = 1;
else if (read_no == 0x80) read_no = 2;
else read_no = 0;
var err = 0, snp = 0, indel = 0;
for (var i = 11; i < t.length; ++i) {
if ((m = /^XE:i:(\d+)/.exec(t[i])) != null) err = m[1];
else if ((m = /^XS:i:(\d+)/.exec(t[i])) != null) snp = m[1];
else if ((m = /^XI:i:(\d+)/.exec(t[i])) != null) indel = m[1];
}
var comment = [err, snp, indel].join(":");
if (last == null) {
last = [qname, chr, pos, pos + l_ref, strand, seq, qual, read_no, comment];
} else if (last[0] != qname) {
print_se(last);
last = [qname, chr, pos, pos + l_ref, strand, seq, qual, read_no, comment];
} else {
if (read_no == 2) { // last[] is the first read
if (last[7] != 1) throw Error("ERROR: can't find read1");
var name = [qname, chr, last[2] + "_" + pos, last[3] + "_" + (pos + l_ref), last[4] + strand].join("!");
print('@' + name + '/1' + ' ' + last[8]); print(last[5]); print("+"); print(last[6]);
print('@' + name + '/2' + ' ' + comment); print(seq); print("+"); print(qual);
} else {
if (last[7] != 2) throw Error("ERROR: can't find read2");
var name = [qname, chr, pos + "_" + last[2], (pos + l_ref) + "_" + last[3], strand + last[4]].join("!");
print('@' + name + '/1' + ' ' + comment); print(seq); print("+"); print(qual);
print('@' + name + '/2' + ' ' + last[8]); print(last[5]); print("+"); print(last[6]);
}
last = null;
}
}
if (last != null) print_se(last);
file.close();
buf.destroy();
buf2.destroy();

View File

@ -28,7 +28,7 @@ Bytes.prototype.revcomp = function()
} }
if (arguments.length < 2) { if (arguments.length < 2) {
print("Usage: k8 pbsim2paf.js <chr.list> <pbsim1.maf> [[pbsim2.maf] ...]"); print("Usage: k8 sim-pbsim.js <ref.fa.fai> <pbsim1.maf> [[pbsim2.maf] ...]");
exit(1); exit(1);
} }

View File

@ -40,7 +40,7 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end);
void radix_sort_64(uint64_t *beg, uint64_t *end); void radix_sort_64(uint64_t *beg, uint64_t *end);
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r); void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag);
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs); void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km); int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km);
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a); mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);

21
tex/Makefile 100644
View File

@ -0,0 +1,21 @@
.SUFFIXES: .gp .tex .eps .pdf .eps.gz
.eps.pdf:
epstopdf --outfile $@ $<
.eps.gz.pdf:
gzip -dc $< | epstopdf --filter > $@
.pdf.eps:
pdftops -eps $< $@
all:minimap2.pdf
roc-color.eps:roc.gp
gnuplot roc.gp
minimap2.pdf:minimap2.tex minimap2.bib roc-color.pdf
pdflatex minimap2; bibtex minimap2; pdflatex minimap2; pdflatex minimap2;
clean:
rm -fr *.toc *.aux *.bbl *.blg *.idx *.log *.out *~ minimap2.pdf

930
tex/bioinfo.cls 100644
View File

@ -0,0 +1,930 @@
\newcommand\classname{bioinfo}
\newcommand\lastmodifieddate{2003/02/08}
\newcommand\versionnumber{0.1}
% Are we printing crop marks?
\newif\if@cropmarkson \@cropmarksontrue
\NeedsTeXFormat{LaTeX2e}[2001/06/01]
\ProvidesClass{\classname}[\lastmodifieddate\space\versionnumber]
\setlength{\paperheight}{11truein}
\setlength{\paperwidth}{8.5truein}
\newif\if@final
\DeclareOption{draft}{\PassOptionsToPackage{draft}{graphicx}}
\DeclareOption{a4paper}{\PassOptionsToPackage{a4}{crop}}
\DeclareOption{centre}{\PassOptionsToPackage{center}{crop}}
\DeclareOption{crop}{\PassOptionsToPackage{cam}{crop}\global\@cropmarksontrue}
\DeclareOption{nocrop}{\PassOptionsToPackage{off}{crop}\global\@cropmarksonfalse}
\DeclareOption{info}{\PassOptionsToPackage{info}{crop}}
\DeclareOption{noinfo}{\PassOptionsToPackage{noinfo}{crop}}
\DeclareOption{final}{\global\@finaltrue}
\ExecuteOptions{a4paper,nocrop,centre,info}
\ProcessOptions
% Load all necessary packages
\RequirePackage{inputenc,crop,graphicx,amsmath,array,color,amssymb,flushend,stfloats,amsthm,chngpage,times}
%\RequirePackage[LY1]{fontenc}
%\RequirePackage[LY1,mtbold]{mathtime}
\def\authoraffliate{\fontfamily{phv}\selectfont}
\def\helvetica{\fontfamily{phv}\selectfont}
\def\helveticaitalic{\fontfamily{phv}\itshape\selectfont}
\def\helveticabold{\fontfamily{phv}\bfseries\selectfont}
\def\helveticabolditalic{\fontfamily{phv}\bfseries\itshape\selectfont}
% Not sure if needed.
\newcommand\@ptsize{0}
% Set twoside printing
\@twosidetrue
% Marginal notes are on the outside edge
\@mparswitchfalse
\reversemarginpar
\renewcommand\normalsize{%
\@setfontsize\normalsize{9}{11}%
\abovedisplayskip 10\p@ \@plus2\p@ \@minus5\p@
\abovedisplayshortskip \z@ \@plus3\p@
\belowdisplayshortskip 6\p@ \@plus3\p@ \@minus3\p@
\belowdisplayskip \abovedisplayskip
\let\@listi\@listI}
\normalsize
\let\@bls\baselineskip
\newcommand\small{%
\@setfontsize\small{9}{11}%
\abovedisplayskip 11\p@ minus 3\p@
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip \z@ plus 2\p@
\belowdisplayshortskip 4\p@ plus 2\p@ minus2\p@
\def\@listi{\topsep 4.5\p@ plus 2\p@ minus 1\p@
\itemsep \parsep
\topsep 4\p@ plus 2\p@ minus 2\p@}}
\newcommand\footnotesize{%
\@setfontsize\footnotesize{8}{10}%
\abovedisplayskip 6\p@ minus 3\p@
\belowdisplayskip\abovedisplayskip
\abovedisplayshortskip \z@ plus 3\p@
\belowdisplayshortskip 6\p@ plus 3\p@ minus 3\p@
\def\@listi{\topsep 3\p@ plus 1\p@ minus 1\p@
\parsep 2\p@ plus 1\p@ minus 1\p@\itemsep \parsep}}
\def\scriptsize{\@setfontsize\scriptsize{7pt}{9pt}}
\def\tiny{\@setfontsize\tiny{5pt}{7pt}}
\def\large{\@setfontsize\large{11.5pt}{12pt}}
\def\Large{\@setfontsize\Large{14pt}{16}}
\def\LARGE{\@setfontsize\LARGE{15pt}{17pt}}
\def\huge{\@setfontsize\huge{22pt}{22pt}}
\def\Huge{\@setfontsize\Huge{30pt}{30pt}}
\DeclareOldFontCommand{\rm}{\normalfont\rmfamily}{\mathrm}
\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
\DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt}
\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf}
\DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit}
\DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl}
\DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc}
% Line spacing
\setlength\lineskip{1\p@}
\setlength\normallineskip{1\p@}
\renewcommand\baselinestretch{}
% Paragraph dimensions and inter-para spacing
\setlength\parskip{0\p@}
\setlength\parindent{3mm}
% Set inter-para skips
\setlength\smallskipamount{3\p@ \@plus 1\p@ \@minus 1\p@}
\setlength\medskipamount{6\p@ \@plus 2\p@}
\setlength\bigskipamount{12\p@ \@plus 4\p@ \@minus 4\p@}
% Page break penalties
\@lowpenalty 51
\@medpenalty 151
\@highpenalty 301
% Disallow widows and orphans
\clubpenalty 10000
\widowpenalty 10000
% Disable page breaks before equations, allow pagebreaks after
% equations and discourage widow lines before equations.
\displaywidowpenalty 100
\predisplaypenalty 10000
\postdisplaypenalty 2500
% Allow breaking the page in the middle of a paragraph
\interlinepenalty 0
% Disallow breaking the page after a hyphenated line
\brokenpenalty 10000
% Hyphenation; don't split words into less than three characters
\lefthyphenmin=3
\righthyphenmin=3
%
% Set page layout dimensions
%
\setlength\headheight{16\p@} % height of running head
\setlength\topmargin{2.9pc} % head margin
\addtolength\topmargin{-1in} % subtract out the 1 inch driver margin
\setlength\topskip{10\p@} % height of first line of text
\setlength\headsep{19\p@} % space below running head --
\setlength\footskip{34\p@} % space above footer line
\setlength\maxdepth{.5\topskip} % pages can be short or deep by half a line?
\setlength\textwidth{42pc} % text measure excluding margins
\setlength\textheight{58\baselineskip} % 54 lines on a full page,
\addtolength\textheight{\topskip} % including the first
% line on the page
% Set the margins
\setlength\marginparsep{3\p@}
\setlength\marginparpush{3\p@}
\setlength\marginparwidth{35\p@}
\setlength\oddsidemargin{4.5pc}
\addtolength\oddsidemargin{-1in} % subtract out the 1 inch driver margin
\setlength\@tempdima{\paperwidth}
\addtolength\@tempdima{-\textwidth}
\addtolength\@tempdima{-4.5pc}
\setlength\evensidemargin{\@tempdima}
\addtolength\evensidemargin{-1in}
\setlength\columnsep{1.5pc} % space between columns for double-column text
\setlength\columnseprule{0\p@} % width of rule between two columns
% Footnotes
\setlength\footnotesep{9\p@} % space between footnotes
% space between text and footnote
\setlength{\skip\footins}{12\p@ \@plus 6\p@ \@minus 1\p@}
% Float placement parameters
% The total number of floats that can be allowed on a page.
\setcounter{totalnumber}{10}
% The maximum number of floats at the top and bottom of a page.
\setcounter{topnumber}{5}
\setcounter{bottomnumber}{5}
% The maximum part of the top or bottom of a text page that can be
% occupied by floats. This is set so that at least four lines of text
% fit on the page.
\renewcommand\topfraction{.9}
\renewcommand\bottomfraction{.9}
% The minimum amount of a text page that must be occupied by text.
% This should accomodate four lines of text.
\renewcommand\textfraction{.06}
% The minimum amount of a float page that must be occupied by floats.
\renewcommand\floatpagefraction{.94}
% The same parameters repeated for double column output
\renewcommand\dbltopfraction{.9}
\renewcommand\dblfloatpagefraction{.9}
% Space between floats
\setlength\floatsep {12\p@ \@plus 2\p@ \@minus 2\p@}
% Space between floats and text
\setlength\textfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
% Space above and below an inline figure
\setlength\intextsep {18\p@ \@plus 2\p@ \@minus 2\p@}
% For double column floats
\setlength\dblfloatsep {12\p@ \@plus 2\p@ \@minus 2\p@}
\setlength\dbltextfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
% Space left at top, bottom and inbetween floats on a float page.
\setlength\@fptop{0\p@} % no space above float page figures
\setlength\@fpsep{12\p@ \@plus 1fil}
\setlength\@fpbot{0\p@}
% The same for double column
\setlength\@dblfptop{0\p@}
\setlength\@dblfpsep{12\p@ \@plus 1fil}
\setlength\@dblfpbot{0\p@}
% Override settings in mathtime back to TeX defaults
\DeclareMathSizes{5} {5} {5} {5}
\DeclareMathSizes{6} {6} {5} {5}
\DeclareMathSizes{7} {7} {5} {5}
\DeclareMathSizes{8} {8} {6} {5}
\DeclareMathSizes{9} {9} {6.5} {5}
\DeclareMathSizes{10} {10} {7.5} {5}
\DeclareMathSizes{12} {12} {9} {7}
% Page styles
\def\ps@headings
{%
\def\@oddfoot{\vbox to 12.5\p@{\hbox{\rule{\textwidth}{0.5\p@}}\vss
\hbox to \textwidth{\hfill\helveticabold\small\thepage}%
}}%
\def\@evenfoot{\vbox to 12.5\p@{\rule{\textwidth}{0.5\p@}\vss
\hbox to \textwidth{\helveticabold\small\thepage\hfill}%
}}%
\def\@evenhead{\vbox{\hbox to \textwidth{\fontsize{8}{10}\selectfont
\helveticabold{\fontshape{it}\selectfont
\strut\leftmark}\hfill}\vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
\def\@oddhead{\vbox{\hbox to \textwidth{\hfill\fontsize{8}{10}\selectfont
\helveticabold{\fontshape{it}\selectfont\strut\rightmark}}%
\vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
\def\titlemark##1{\markboth{##1}{##1}}%
\def\authormark##1{\gdef\leftmark{##1}}%
}
\def\ps@opening
{%
\def\@oddfoot{\vbox to 13\p@{\hbox{\rule{\textwidth}{1\p@}}\vss
\hbox to \textwidth{\helvetica
\fontsize{7}{9}\fontshape{n}\selectfont%
\hfill\small\helveticabold\thepage}%
}}%
\def\@evenfoot{\vbox to 13\p@{\rule{\textwidth}\vss
\hbox to \textwidth{\helvetica\thepage\hfill
\fontsize{7}{9}\fontshape{n}\selectfont}%
}}%
\let\@evenhead\relax
\let\@oddhead\relax}
% Page range
\newif\iflastpagegiven \lastpagegivenfalse
\newcommand\firstpage[1]{%
\gdef\@firstpage{#1}%
\ifnum\@firstpage>\c@page
\setcounter{page}{#1}%
\ClassWarning{BIO}{Increasing pagenumber to \@firstpage}%
\else \ifnum\@firstpage<\c@page
\ClassWarning{BIO}{Firstpage lower than pagenumber}\fi\fi
\xdef\@firstpage{\the\c@page}%
}
\def\@firstpage{1}
\def\pagenumbering#1{%
\global\c@page \@ne
\gdef\thepage{\csname @#1\endcsname \c@page}%
\gdef\thefirstpage{%
\csname @#1\endcsname \@firstpage}%
\gdef\thelastpage{%
\csname @#1\endcsname \@lastpage}%
}
\newcommand\lastpage[1]{\xdef\@lastpage{#1}%
\global\lastpagegiventrue}
\def\@lastpage{0}
\def\setlastpage{\iflastpagegiven\else
\edef\@tempa{@lastpage@}%
\expandafter
\ifx \csname \@tempa \endcsname \relax
\gdef\@lastpage{0}%
\else
\xdef\@lastpage{\@nameuse{@lastpage@}}%
\fi
\fi }
\def\writelastpage{%
\iflastpagegiven \else
\immediate\write\@auxout%
{\string\global\string\@namedef{@lastpage@}{\the\c@page}}%
\fi
}
\def\thepagerange{%
\ifnum\@lastpage =0 {\ \bf ???} \else
\ifnum\@lastpage = \@firstpage \ \thefirstpage\else
\thefirstpage--\thelastpage \fi\fi}
\AtBeginDocument{\setlastpage
\pagenumbering{arabic}%
}
\AtEndDocument{%
\writelastpage
\if@final
\clearemptydoublepage
\else
\clearpage
\fi}
%
% Sectional units
%
% Counters
\newcounter{section}
\newcounter{subsection}[section]
\newcounter{subsubsection}[subsection]
\newcounter{paragraph}[subsubsection]
\newcounter{subparagraph}[paragraph]
\newcounter{figure}
\newcounter{table}
% Form of the numbers
\newcommand\thepage{\arabic{page}}
\renewcommand\thesection{\arabic{section}}
\renewcommand\thesubsection{{\thesection.\arabic{subsection}}}
\renewcommand\thesubsubsection{{\thesubsection.\arabic{subsubsection}}}
\renewcommand\theparagraph{\thesubsubsection.\arabic{paragraph}}
\renewcommand\thesubparagraph{\theparagraph.\arabic{subparagraph}}
\renewcommand\theequation{\arabic{equation}}
% Form of the words
\newcommand\contentsname{Contents}
\newcommand\listfigurename{List of Figures}
\newcommand\listtablename{List of Tables}
\newcommand\partname{Part}
\newcommand\appendixname{Appendix}
\newcommand\abstractname{Abstract}
\newcommand\refname{References}
\newcommand\bibname{References}
\newcommand\indexname{Index}
\newcommand\figurename{Fig.}
\newcommand\tablename{Table}
% Clearemptydoublepage should really clear the running heads too
\newcommand{\clearemptydoublepage}{\newpage{\pagestyle{empty}\cleardoublepage}}
% Frontmatter, mainmatter and backmatter
\newif\if@mainmatter \@mainmattertrue
\newcommand\frontmatter{%
\clearpage
\@mainmatterfalse
\pagenumbering{roman}}
\newcommand\mainmatter{%
\clearpage
\@mainmattertrue
\pagenumbering{arabic}}
\newcommand\backmatter{%
\clearpage
\@mainmatterfalse}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TITLE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newlength{\dropfromtop}
\setlength{\dropfromtop}{\z@}
% Application Notes
\newif\if@appnotes
\newcommand{\application}{%
% \setlength{\dropfromtop}{-2.25pc}%
\global\@appnotestrue}
\long\def\title{\@ifnextchar[{\short@title}{\@@title}}
\def\short@title[#1]{\titlemark{#1}\@@@title}
\def\@@title#1{\authormark{#1}\@@@title{#1}}
\long\def\@@@title#1{\gdef\@title{#1}}
\long\def\author{\@ifnextchar[{\short@uthor}{\@uthor}}
\def\short@uthor[#1]{\authormark{#1}\@@author}
\def\@uthor#1{\authormark{#1}\@@author{#1}}
\long\def\@@author#1{\gdef\@author{#1}}
\def\vol#1{\global\def\@vol{#1}}
\def\issue#1{\global\def\@issue{#1}}
\def\address#1{\global\def\@issue{#1}}
\def\history#1{\global\def\@history{#1}}
\def\editor#1{\global\def\@editor{#1}}
\def\pubyear#1{\global\def\@pubyear{#1}}
\def\copyrightyear#1{\global\def\@copyrightyear{#1}}
\def\address#1{\global\def\@address{#1}}
\def\DOI#1{\global\def\@DOI{#1}}
\definecolor{gray}{cmyk}{0, 0, 0, 0.15}
\newlength{\extraspace}
\setlength{\extraspace}{\z@}
\newcommand\maketitle{\par
\begingroup
\renewcommand\thefootnote{\@fnsymbol\c@footnote}%
\def\@makefnmark{\rlap{\@textsuperscript{\normalfont\@thefnmark}}}%
\long\def\@makefntext##1{\parindent 3mm\noindent
% \@textsuperscript{\normalfont\@thefnmark}\raggedright##1}%
\@textsuperscript{\normalfont\@thefnmark}##1}%
\if@twocolumn
\ifnum \col@number=\@ne
\@maketitle
\else
\twocolumn[\@maketitle]%
\fi
\else
\newpage
\global\@topnum\z@ % Prevents figures from going at top of page.
\@maketitle
\fi
\thispagestyle{opening}\@thanks
\endgroup
\setcounter{footnote}{0}%
\global\let\thanks\relax
\global\let\maketitle\relax
\global\let\@maketitle\relax
\global\let\@address\@empty
\global\let\@history\@empty
\global\let\@editor\@empty
\global\let\@thanks\@empty
\global\let\@author\@empty
\global\let\@date\@empty
\global\let\@title\@empty
\global\let\@pubyear\@empty
\global\let\address\relax
\global\let\history\relax
\global\let\editor\relax
\global\let\title\relax
\global\let\author\relax
\global\let\date\relax
\global\let\pubyear\relax
\global\let\@copyrightline\@empty
\global\let\and\relax
\@afterindentfalse\@afterheading
}
\newlength{\aboveskipchk}%for checking oddpage or evenpage top skip
\setlength{\aboveskipchk}{\z@}%
\def\@maketitle{%
\let\footnote\thanks
\clearemptydoublepage
\checkoddpage\ifcpoddpage\setlength{\aboveskipchk}{-3pc}\else\setlength{\aboveskipchk}{-5pc}\fi%for checking oddpage or evenpage top skip%%
\vspace*{\aboveskipchk}%
\vspace{\dropfromtop}%
\hbox to \textwidth{%
{\helvetica\itshape\bfseries\fontsize{19}{12}\selectfont {\color{gray}TECHNICAL REPORT}
\hfil
\if@appnotes APPLICATIONS NOTE\hfil\fi
}%
\enskip \parbox[b]{11.3pc}{%
\helvetica
\flushright\fontsize{8}{10}\fontshape{it}\selectfont
\hfill
}}
\rule{\textwidth}{1\p@}\par%
\helvetica
\hbox to \textwidth{%
\parbox[t]{41pc}{%
\vspace*{1sp}
{\helveticabold\fontsize{16}{21}\selectfont\raggedright \@title \par}%
\vspace{4.5\p@}
{\authoraffliate\fontsize{11}{13}\selectfont\raggedright \@author \par}%
\vspace{4\p@}
{\authoraffliate\fontsize{9}{11}\selectfont\raggedright \@address \par}%
\vspace{4\p@}
%{\helvetica\fontsize{8}{10}\selectfont\raggedright \@history \par}
%\vspace{24\p@}
%{\helvetica\fontsize{10}{12}\selectfont\raggedright \@editor \par}
%\vspace{20\p@}
}%
}
\vspace{4.5\p@}%
\rule{\textwidth}{1\p@}%
\vspace{12\p@ plus 6\p@ minus 6\p@}%
\vspace{\extraspace}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%% Abstract %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\absection}[1]{%
\par\noindent{\bfseries #1}\space\ignorespaces}
\newenvironment{abstract}{%
\begingroup
\let\section\absection
\fontfamily{\sfdefault}\fontsize{8}{11}\sffamily\selectfont
{\fontseries{b}\selectfont ABSTRACT}\par}
{\endgroup\bigskip\@afterheading\@afterindentfalse\vskip 12pt plus 3pt minus 1pt}
% Section macros
% Lowest level heading that takes a number by default
\setcounter{secnumdepth}{3}
\renewcommand{\@seccntformat}[1]{\csname the#1\endcsname\quad}
\def\section{%
\@startsection{section}{1}{\z@}
{-22\p@ plus -3\p@}{3\p@}
{\reset@font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
\def\subsection{%
\@startsection{subsection}{2}{\z@}
{-11\p@ plus -2\p@}{3\p@}
{\reset@font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
\def\subsubsection{%
\@startsection{subsubsection}{3}{\z@}
%{-11\p@ plus -1\p@}{-1em}
{-11\p@ plus -1\p@}{0.001em}
{\reset@font\normalfont\normalsize\itshape}}
\def\textcolon{\text{\rm :}}
\def\paragraph{%
\@startsection{paragraph}{4}{\z@}
{-6\p@}
{-.4em}
{\reset@font\itshape}}
% ********************
% Figures and tables *
% ********************
% Table and array parameters
\setlength\arraycolsep{.5em}
\setlength\tabcolsep{.5em}
\setlength\arrayrulewidth{.5pt}
\setlength\doublerulesep{2.5pt}
\setlength\extrarowheight{\z@}
\renewcommand\arraystretch{1}
\newlength{\abovecaptionskip}
\newlength{\belowcaptionskip}
\setlength{\abovecaptionskip}{13pt}
\setlength{\belowcaptionskip}{10.5pt}
\long\def\@makecaption#1#2{\vspace{\abovecaptionskip}%
\begingroup
\footnotesize
\textbf{#1.}\enskip{#2}\par
\endgroup}
\long\def\@tablecaption#1#2{%
\begingroup
\footnotesize
\textbf{#1.}\enskip{#2\strut\par}
\endgroup\vspace{\belowcaptionskip}}
% Table rules
\def\toprule{\noalign{\ifnum0=`}\fi\hrule \@height 0.5pt \hrule \@height 6pt \@width 0pt \futurelet
\@tempa\@xhline}
\def\midrule{\noalign{\ifnum0=`}\fi \hrule \@height 6.75pt \@width 0pt \hrule \@height 0.5pt
\hrule \@height 6pt \@width 0pt \futurelet \@tempa\@xhline}
\def\botrule{\noalign{\ifnum0=`}\fi \hrule \@height 5.75pt \@width 0pt \hrule \@height 0.5pt \futurelet
\@tempa\@xhline}
\def\hrulefill{\leavevmode\leaders\hrule height .5pt\hfill\kern\z@}
\def\thefigure{\@arabic\c@figure}
\def\fps@figure{tbp}
\def\ftype@figure{1}
\def\ext@figure{lof}
\def\fnum@figure{\figurename~\thefigure}
\def\figure{\@float{figure}}
\let\endfigure\end@float
\@namedef{figure*}{\@dblfloat{figure}}
\@namedef{endfigure*}{\end@dblfloat}
\def\thetable{\@arabic\c@table}
\def\fps@table{tbp}
\def\ftype@table{2}
\def\ext@table{lot}
\def\fnum@table{Table~\thetable}
\def\table{\let\@makecaption\@tablecaption\let\source\tablesource\@float{table}}
\def\endtable{\end@float}
\@namedef{table*}{\let\@makecaption\@tablecaption\@dblfloat{table}}
\@namedef{endtable*}{\end@dblfloat}
\newif\if@rotate \@rotatefalse
\newif\if@rotatecenter \@rotatecenterfalse
\def\rotatecenter{\global\@rotatecentertrue}
\def\rotateendcenter{\global\@rotatecenterfalse}
\def\rotate{\global\@rotatetrue}
\def\endrotate{\global\@rotatefalse}
\newdimen\rotdimen
\def\rotstart#1{\special{ps: gsave currentpoint currentpoint translate
#1 neg exch neg exch translate}}
\def\rotfinish{\special{ps: currentpoint grestore moveto}}
\def\rotl#1{\rotdimen=\ht#1\advance\rotdimen by \dp#1
\hbox to \rotdimen{\vbox to\wd#1{\vskip \wd#1
\rotstart{270 rotate}\box #1\vss}\hss}\rotfinish}
\def\rotr#1{\rotdimen=\ht #1\advance\rotdimen by \dp#1
\hbox to \rotdimen{\vbox to \wd#1{\vskip \wd#1
\rotstart{90 rotate}\box #1\vss}\hss}\rotfinish}
\newdimen\tempdime
\newbox\temptbox
% From ifmtarg.sty
% Copyright Peter Wilson and Donald Arseneau, 2000
\begingroup
\catcode`\Q=3
\long\gdef\@ifmtarg#1{\@xifmtarg#1QQ\@secondoftwo\@firstoftwo\@nil}
\long\gdef\@xifmtarg#1#2Q#3#4#5\@nil{#4}
\long\gdef\@ifnotmtarg#1{\@xifmtarg#1QQ\@firstofone\@gobble\@nil}
\endgroup
\def\tablesize{\@setfontsize\tablesize{8\p@}{10\p@}}
\newenvironment{processtable}[3]{\setbox\temptbox=\hbox{{\tablesize #2}}%
\tempdime\wd\temptbox\@processtable{#1}{#2}{#3}{\tempdime}}
{\relax}
\newcommand{\@processtable}[4]{%
\if@rotate
\setbox4=\vbox to \hsize{\vss\hbox to \textheight{%
\begin{minipage}{#4}%
\@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
\vskip7\p@\noindent
\parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
\end{minipage}}\vss}%
\rotr{4}
\else
\hbox to \hsize{\hss\begin{minipage}[t]{#4}%
\vskip2.9pt
\@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
\vskip6\p@\noindent
\parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
\end{minipage}\hss}\fi}%
\newcolumntype{P}[1]{>{\raggedright\let\\\@arraycr\hangindent1em}p{#1}}
% ******************************
% List numbering and lettering *
% ******************************
\def\labelenumi{{\rm\arabic{enumi}.}}
\def\theenumi{\arabic{enumi}}
\def\labelenumii{{\rm\alph{enumii}.}}
\def\theenumii{\alph{enumii}}
\def\p@enumii{\theenumi}
\def\labelenumiii{{\rm(\arabic{enumiii})}}
\def\theenumiii{\roman{enumiii}}
\def\p@enumiii{\theenumi(\theenumii)}
\def\labelenumiv{{\rm(\arabic{enumiv})}}
\def\theenumiv{\Alph{enumiv}}
\def\p@enumiv{\p@enumiii\theenumiii}
\def\labelitemi{{\small$\bullet$}}
\def\labelitemii{{\small$\bullet$}}
\def\labelitemiii{{\small$\bullet$}}
\def\labelitemiv{{\small$\bullet$}}
\def\@listI{\leftmargin\leftmargini \topsep\medskipamount}
\let\@listi\@listI
\@listi
\def\@listii{\topsep\z@\leftmargin\leftmarginii}
\def\@listiii{\leftmargin\leftmarginiii \topsep\z@}
\def\@listiv{\leftmargin\leftmarginiv \topsep\z@}
\def\@listv{\leftmargin\leftmarginv \topsep\z@}
\def\@listvi{\leftmargin\leftmarginvi \topsep\z@}
\setlength{\leftmargini}{3mm}
\setlength{\leftmarginii}{\z@}
\setlength{\leftmarginiii}{\z@}
\setlength{\leftmarginiv}{\z@}
% Changes to the list parameters for enumerate
\def\enumargs{%
\partopsep \z@
\itemsep 3\p@
\parsep \z@
\labelsep 0.5em
\listparindent \parindent
\itemindent \z@
\topsep 11\p@
}
\def\enumerate{%
\@ifnextchar[{\@numerate}{\@numerate[0]}}
\def\@numerate[#1]{%
\ifnum \@enumdepth >3 \@toodeep\else
\advance\@enumdepth \@ne
\edef\@enumctr{enum\romannumeral\the\@enumdepth}
\list{\csname label\@enumctr\endcsname}{%
\enumargs
\setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@enumdepth\endcsname}
\usecounter{\@enumctr}
\settowidth\labelwidth{#1}
\addtolength{\leftmargin}{\labelwidth}
\addtolength{\leftmargin}{\labelsep}
\def\makelabel##1{\hss \llap{##1}}}%
\fi
}
\let\endenumerate\endlist
% Changes to the list parameters for itemize
\def\itemargs{%
\partopsep \z@
\itemsep 3\p@
\parsep \z@
\labelsep 0.5em
\rightmargin \z@
\listparindent \parindent
\itemindent \z@
\topsep11\p@
}
\def\itemize{%
\@ifnextchar[{\@itemize}{\@itemize[$\bullet$]}}
\def\@itemize[#1]{%
\ifnum \@itemdepth >3 \@toodeep\else
\advance\@itemdepth \@ne
\edef\@itemctr{item\romannumeral\the\@itemdepth}
\list{\csname label\@itemctr\endcsname}{%
\itemargs
\setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@itemdepth\endcsname}
\settowidth\labelwidth{#1}
\addtolength{\leftmargin}{\labelwidth}
\addtolength{\leftmargin}{\labelsep}
\def\makelabel##1{\hss \llap{##1}}}%
\fi
}
\let\enditemize\endlist
\newenvironment{unlist}{%
\begin{list}{}%
{\setlength{\labelwidth}{\z@}%
\setlength{\labelsep}{\z@}%
\setlength{\topsep}{\medskipamount}%
\setlength{\itemsep}{3\p@}%
\setlength{\leftmargin}{2em}%
\setlength{\itemindent}{-2em}}}
{\end{list}}
% ***********************
% Quotes and Quotations *
% ***********************
\def\quotation{\par\begin{list}{}{
\setlength{\topsep}{\medskipamount}
\setlength{\leftmargin}{2em}%
\setlength{\rightmargin}{\z@}%
\setlength\labelwidth{0pt}%
\setlength\labelsep{0pt}%
\listparindent\parindent}%
\item[]}
\def\endquotation{\end{list}}
\let\quote\quotation
\let\endquote\endquotation
\skip\@mpfootins = \skip\footins
\fboxsep=6\p@
\fboxrule=1\p@
% *******************
% Table of contents *
% *******************
\newcommand\@pnumwidth{4em}
\newcommand\@tocrmarg{2.55em plus 1fil}
\newcommand\@dotsep{1000}
\setcounter{tocdepth}{4}
\def\numberline#1{\hbox to \@tempdima{{#1}}}
\def\@authortocline#1#2#3#4#5{%
\vskip 1.5\p@
\ifnum #1>\c@tocdepth \else
{\leftskip #2\relax \rightskip \@tocrmarg \parfillskip -\rightskip
\parindent #2\relax\@afterindenttrue
\interlinepenalty\@M
\leavevmode
\@tempdima #3\relax
\advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip
{\itshape #4}\nobreak
\leaders\hbox{$\m@th
\mkern \@dotsep mu\hbox{.}\mkern \@dotsep
mu$}\hfill
\nobreak
\hb@xt@\@pnumwidth{\hfil}%
\par}%
\fi}
\newcommand*\l@author{\@authortocline{2}{0pt}{30pt}}
\newcommand*\l@section{\@dottedtocline{3}{11pt}{20pt}}
\newcommand*\l@subsection{\@dottedtocline{4}{31pt}{29pt}}
\newcommand*\l@subsubsection[2]{}
% ***********
% Footnotes *
% ***********
\def\footnoterule{\noindent\rule{\columnwidth}{0.5pt}}
\def\@makefnmark{\@textsuperscript{\normalfont\@thefnmark}}%
\newcommand\@makefntext[1]{\noindent{\@makefnmark}\enskip#1}
% ***********
% References *
% ***********
\providecommand{\newblock}{}
\newenvironment{thebibliography}{%
\section{\bibname}%
\begingroup
\small
\begin{list}{}{%
\setlength{\topsep}{\z@}%
\setlength{\labelsep}{\z@}%
\settowidth{\labelwidth}{\z@}%
\setlength{\leftmargin}{4mm}%
\setlength{\itemindent}{-4mm}}\small}
{\end{list}\endgroup}
\RequirePackage{natbib}
% **********
% Appendix *
% **********
\newif\ifappend % Are we in the Appendix?
\def\appendix{\par
\setcounter{section}{0}
\setcounter{subsection}{0}
\appendtrue
}
%Math parameters
\setlength{\jot}{5\p@}
\mathchardef\@m=1500 % adapted value
\def\frenchspacing{\sfcode`\.\@m \sfcode`\?\@m \sfcode`\!\@m
\sfcode`\:\@m \sfcode`\;\@m \sfcode`\,\@m}
% Theorems
\def\th@plain{%
%% \let\thm@indent\noindent % no indent
\thm@headfont{\quad\scshape}% heading font is bold
\thm@notefont{\upshape\mdseries}% same as heading font
\thm@headpunct{.}% no period after heading
\thm@headsep 5\p@ plus\p@ minus\p@\relax
%% \let\thm@swap\@gobble
%% \thm@preskip\topsep
%% \thm@postskip\theorempreskipamount
\itshape % body font
}
\vbadness=9999
\tolerance=9999
\doublehyphendemerits=10000
\doublehyphendemerits 640000 % corresponds to badness 800
\finalhyphendemerits 1000000 % corresponds to badness 1000
\flushbottom
\frenchspacing
\ps@headings
\twocolumn
% Screen PDF compatability
\newcommand{\medline}[1]{%
\unskip\unskip\ignorespaces}
%%%%for smaller size text
\newenvironment{methods}{%
\begingroup
\def\section{%
\@startsection{section}{1}{\z@}
{-24\p@ plus -3\p@}{4\p@}
{\reset@font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
\def\subsection{%
\@startsection{subsection}{2}{\z@}
{-5\p@ plus -2\p@}{4\p@}
{\reset@font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
\def\subsubsection{%
\@startsection{subsubsection}{3}{\z@}
% {-6\p@ plus -1\p@}{-1em}
{-6\p@ plus -1\p@}{0.001em}
{\reset@font\normalfont\normalsize\itshape}}
\footnotesize
\par}
{\par\endgroup\bigskip\@afterheading\@afterindentfalse}
\graphicspath{{g:/artwork/oup/bioinfo/}}
\language=2
\hyphenation{Figure Table Figures Tables}
\newcommand{\href}[2]{#2}
\renewenvironment{proof}[1][\proofname]{\par
\normalfont \topsep6\p@\@plus6\p@\relax
\labelsep 0.5em
\trivlist
\item[\hskip\labelsep\hskip1em\textsc{#1}.]\ignorespaces
}{\endtrivlist\@endpefalse}
%%Different Bonds
\def\sbond{\ensuremath{\raise.25ex\hbox{${-}\!\!\!\!{-}$}}\kern -.9pt}
\def\dbond{\ensuremath{\raise.25ex\hbox{=$\!$=}}}
\def\tbond{\ensuremath{\raise.20ex\hbox{${\equiv}\!\!\!{\equiv}$}}}
% Author queries
%\fboxsep=4\p@
%\fboxrule=0.5\p@
\newcommand{\query}[2][0pt]{}%
% \marginpar{\vspace*{#1}%
% {\parbox{\marginparwidth}{%
% \raggedright\fontsize{6}{8}\selectfont
% #2}}}}
\renewcommand{\dag}{{\mathversion{normal}$^{\dagger}$}}
\endinput

17
tex/blasr-mc.eval 100644
View File

@ -0,0 +1,17 @@
Q 60 32681 57 0.001744133
Q 39 3 1 0.001774569
Q 38 3 1 0.001804999
Q 35 5 1 0.001835311
Q 34 31 2 0.001894692
Q 20 11 2 0.001955154
Q 19 4 1 0.001985460
Q 15 29 5 0.002136296
Q 14 6 1 0.002166417
Q 10 11 1 0.002196193
Q 6 11 2 0.002256442
Q 5 1 1 0.002286864
Q 4 1 1 0.002317285
Q 3 36 15 0.002771602
Q 2 5 2 0.002832085
Q 1 12 9 0.003105023
Q 0 220 83 0.005594194

55
tex/bwa.eval 100644
View File

@ -0,0 +1,55 @@
Q 60 31721 27 0.000851171
Q 59 54 4 0.000975610
Q 58 29 5 0.001131933
Q 57 21 2 0.001194030
Q 56 14 4 0.001319137
Q 55 22 6 0.001506544
Q 54 12 4 0.001631475
Q 53 16 3 0.001724733
Q 51 10 1 0.001755541
Q 50 10 1 0.001786330
Q 49 11 3 0.001879699
Q 47 8 2 0.001941869
Q 46 17 1 0.001972140
Q 44 8 3 0.002065534
Q 43 10 1 0.002096174
Q 42 13 1 0.002126595
Q 41 14 3 0.002219444
Q 40 13 2 0.002281036
Q 38 17 4 0.002404747
Q 37 15 4 0.002528484
Q 36 12 1 0.002558742
Q 35 19 3 0.002650783
Q 34 12 3 0.002743313
Q 33 7 1 0.002773882
Q 32 21 3 0.002865508
Q 31 11 2 0.002926799
Q 30 14 3 0.003018891
Q 29 17 1 0.003048401
Q 28 11 2 0.003109549
Q 27 20 5 0.003262998
Q 26 11 1 0.003292948
Q 25 14 4 0.003415725
Q 24 16 5 0.003569212
Q 23 43 6 0.003750426
Q 21 15 1 0.003779664
Q 20 29 7 0.003992943
Q 19 22 2 0.004052089
Q 18 28 4 0.004172204
Q 16 25 5 0.004323390
Q 15 24 5 0.004474480
Q 14 25 5 0.004625204
Q 13 23 3 0.004714365
Q 12 22 1 0.004741963
Q 11 32 11 0.005075674
Q 10 35 7 0.005285315
Q 9 32 12 0.005648503
Q 8 33 8 0.005888126
Q 7 39 7 0.006095506
Q 6 42 14 0.006515953
Q 5 38 15 0.006966725
Q 4 37 12 0.007325113
Q 3 49 18 0.007862737
Q 2 63 21 0.008486434
Q 1 55 27 0.009292156
Q 0 153 77 0.011576593

33
tex/eval2roc.pl 100755
View File

@ -0,0 +1,33 @@
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts = (n=>33088, s=>100);
getopts('n:', \%opts);
my $pseudo = .5;
my $tot = $pseudo;
my $err = $pseudo;
my $tot_last_out = -$opts{s};
my $state = 0;
my $mapq = 0;
while (<>) {
chomp;
if (/^Q\t(\d+)\t(\d+)\t(\d+)/) {
$tot += $2;
$err += $3;
if ($tot - $tot_last_out >= $opts{s}) {
print join("\t", $1, $err/$tot, $tot / $opts{n}), "\n";
$tot_last_out = $tot;
$state = 0;
} else {
$state = 1;
$mapq = $1;
}
}
}
if ($state) {
print join("\t", $mapq, $err/$tot, $tot / $opts{n}), "\n";
}

View File

@ -0,0 +1,4 @@
Q 40 31897 63 0.001975107
Q 3 423 267 0.010210396
Q 2 162 120 0.013853827
Q 1 188 172 0.019038874

4
tex/hs38-simu.sh 100644
View File

@ -0,0 +1,4 @@
./pbsim --prefix pb-1 --depth 0.1 --sample-fastq m131017_060208_42213_c100579642550000001823095604021496_s1_p0.1.subreads.fastq --length-min 1000 --length-max 30000 --seed 11 hs38.fa
bin/mason_variator -ir hs38.fa -s 1 -ov hs38-s1.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10
bin/mason_simulator -ir hs38.fa -iv hs38-s1.vcf -n 1000000 --seed 1 -o s1_1.fq -or s1_2.fq -oa s1.sam --illumina-prob-mismatch-scale 2.5

49
tex/minialign.eval 100644
View File

@ -0,0 +1,49 @@
Q 60 32070 190 0.005924540
Q 59 62 2 0.005975352
Q 58 37 5 0.006123908
Q 57 40 7 0.006333633
Q 56 39 6 0.006512032
Q 55 32 2 0.006567534
Q 54 54 2 0.006618420
Q 53 33 4 0.006735255
Q 52 39 2 0.006788866
Q 51 48 3 0.006871264
Q 50 34 2 0.006925634
Q 49 32 3 0.007011070
Q 48 35 2 0.007064967
Q 47 36 4 0.007179896
Q 46 23 1 0.007205495
Q 45 25 1 0.007230614
Q 44 17 3 0.007318716
Q 43 17 2 0.007376121
Q 42 31 5 0.007522016
Q 41 25 4 0.007638486
Q 40 26 4 0.007754541
Q 39 35 2 0.007807258
Q 37 18 4 0.007924896
Q 36 13 3 0.008013162
Q 35 15 2 0.008070411
Q 34 20 3 0.008156805
Q 33 11 1 0.008184501
Q 32 15 3 0.008272003
Q 31 25 1 0.008296107
Q 29 8 1 0.008324472
Q 28 7 2 0.008383452
Q 27 9 2 0.008441894
Q 26 30 2 0.008494888
Q 23 2 1 0.008524710
Q 22 11 3 0.008612846
Q 20 23 3 0.008697760
Q 19 6 1 0.008726479
Q 18 8 1 0.008754658
Q 16 6 1 0.008783354
Q 13 2 1 0.008813108
Q 12 4 2 0.008872604
Q 11 7 2 0.008931275
Q 10 4 3 0.009021009
Q 9 6 4 0.009140436
Q 8 6 3 0.009229559
Q 7 5 1 0.009258419
Q 6 8 3 0.009346925
Q 4 8 5 0.009495872
Q 3 17 8 0.009732801

181
tex/minimap2.bib 100644
View File

@ -0,0 +1,181 @@
@article{Chaisson:2012aa,
Author = {Chaisson, Mark J and Tesler, Glenn},
Journal = {BMC Bioinformatics},
Pages = {238},
Title = {{Mapping single molecule sequencing reads using basic local alignment with successive refinement (BLASR): application and theory}},
Volume = {13},
Year = {2012}}
@article{Liu:2016ab,
Author = {Liu, Bo and others},
Journal = {Bioinformatics},
Pages = {1625-31},
Title = {{rHAT}: fast alignment of noisy long reads with regional hashing},
Volume = {32},
Year = {2016}}
@article{Liu:2017aa,
Author = {Liu, Bo and others},
Journal = {Bioinformatics},
Pages = {192-201},
Title = {{LAMSA}: fast split read alignment with long approximate matches},
Volume = {33},
Year = {2017}}
@article{Lin:2017aa,
Author = {Lin, Hsin-Nan and Hsu, Wen-Lian},
Journal = {Bioinformatics},
Title = {Kart: a divide-and-conquer algorithm for {NGS} read alignment},
Year = {2017}}
@article{Li:2013aa,
Author = {Li, Heng},
Journal = {arXiv:1303.3997},
Title = {Aligning sequence reads, clone sequences and assembly contigs with {BWA-MEM}},
archivePrefix = "arXiv",
eprint = {1303.3997},
primaryClass = "q-bio",
Year = {2013}}
@article{Sovic:2016aa,
Author = {Sovi{\'c}, Ivan and others},
Journal = {Nat Commun},
Pages = {11307},
Title = {Fast and sensitive mapping of nanopore sequencing reads with {GraphMap}},
Volume = {7},
Year = {2016}}
@article{Langmead:2012fk,
Author = {Langmead, Ben and Salzberg, Steven L},
Journal = {Nat Methods},
Pages = {357-9},
Title = {Fast gapped-read alignment with {Bowtie} 2},
Volume = {9},
Year = {2012}}
@article{Li:2016aa,
Author = {Li, Heng},
Journal = {Bioinformatics},
Pages = {2103-10},
Title = {Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences},
Volume = {32},
Year = {2016}}
@misc{Suzuki:2016,
title = {Fast and accurate alignment tool for PacBio and Nanopore long reads},
author = {Hajime Suzuki},
journal = {Unpublished},
howpublished = {\href{https://github.com/ocxtal/minialign}{https://github.com/ocxtal/minialign}},
year = {2016}}
@misc{Ruan:2016,
title = {Ultra-fast de novo assembler using long noisy reads},
author = {Jue Ruan},
journal = {Unpulished},
howpublished = {\href{https://github.com/ruanjue/smartdenovo}{https://github.com/ruanjue/smartdenovo}},
year = {2016}}
@article{Miller:1988aa,
Author = {Miller, W and Myers, E W},
Journal = {Bull Math Biol},
Number = {2},
Pages = {97-120},
Title = {Sequence comparison with concave weighting functions},
Volume = {50},
Year = {1988}}
@article{Gotoh:1990aa,
Author = {Gotoh, O},
Journal = {Bull Math Biol},
Pages = {359-73},
Title = {Optimal sequence alignment allowing for long gaps},
Volume = {52},
Year = {1990}}
@article{Wu:1996aa,
Author = {Wu, Sun and others},
Journal = {Algorithmica},
Pages = {50-67},
Title = {A subquadratic algorithm for approximate limited expression matching},
Volume = {15},
Year = {1996}}
@article{Daily:2016aa,
Author = {Daily, Jeff},
Journal = {BMC Bioinformatics},
Month = {Feb},
Pages = {81},
Title = {Parasail: {SIMD C} library for global, semi-global, and local pairwise sequence alignments},
Volume = {17},
Year = {2016}}
@article{Sedlazeck169557,
author = {Sedlazeck, Fritz J and others},
title = {Accurate detection of complex structural variations using single molecule sequencing},
note = {doi:10.1101/169557},
journal = {bioRxiv},
year = {2017}}
@article{Altschul:1997vn,
Author = {Altschul, S F and others},
Journal = {Nucleic Acids Res},
Pages = {3389-402},
Title = {Gapped {BLAST} and {PSI-BLAST}: a new generation of protein database search programs},
Volume = {25},
Year = {1997}}
@article{Sosic:2017aa,
Author = {{\v S}o{\v s}i\'{c}, Martin and {\v S}ikic, Mile},
Journal = {Bioinformatics},
Pages = {1394-1395},
Title = {Edlib: a {C/C++} library for fast, exact sequence alignment using edit distance},
Volume = {33},
Year = {2017}}
@article{Abouelhoda:2005aa,
Author = {Mohamed Ibrahim Abouelhoda and Enno Ohlebusch},
Journal = {J. Discrete Algorithms},
Pages = {321-41},
Title = {Chaining algorithms for multiple genome comparison},
Volume = {3},
Year = {2005}}
@article{Ono:2013aa,
Author = {Ono, Yukiteru and others},
Journal = {Bioinformatics},
Pages = {119-21},
Title = {{PBSIM}: {PacBio} reads simulator--toward accurate genome assembly},
Volume = {29},
Year = {2013}}
@article {Jain128835,
author = {Jain, Miten and others},
title = {Nanopore sequencing and assembly of a human genome with ultra-long reads},
year = {2017},
note = {doi:10.1101/128835},
publisher = {Cold Spring Harbor Labs Journals},
journal = {bioRxiv}}
@article{Lau:2016aa,
Author = {Lau, Bayo and others},
Journal = {Bioinformatics},
Pages = {3829-3832},
Title = {{LongISLND}: in silico sequencing of lengthy and noisy datatypes},
Volume = {32},
Year = {2016}}
@article{Robinson:2011aa,
Author = {Robinson, James T and others},
Journal = {Nat Biotechnol},
Pages = {24-6},
Title = {Integrative genomics viewer},
Volume = {29},
Year = {2011}}
@article {Suzuki130633,
author = {Suzuki, Hajime and Kasahara, Masahiro},
title = {Acceleration Of Nucleotide Semi-Global Alignment With Adaptive Banded Dynamic Programming},
year = {2017},
note = {doi:10.1101/130633},
publisher = {Cold Spring Harbor Labs Journals},
journal = {bioRxiv}}

344
tex/minimap2.tex 100644
View File

@ -0,0 +1,344 @@
\documentclass{bioinfo}
\copyrightyear{2017}
\pubyear{2017}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{url}
\usepackage{amsmath}
\usepackage[ruled,vlined]{algorithm2e}
\newcommand\mycommfont[1]{\footnotesize\rmfamily{\it #1}}
\SetCommentSty{mycommfont}
\SetKwComment{Comment}{$\triangleright$\ }{}
\usepackage{natbib}
\bibliographystyle{apalike}
\usepackage{hyperref}
\DeclareMathOperator*{\argmax}{argmax}
\begin{document}
\firstpage{1}
\title[Long DNA sequence alignment with minimap2]{Minimap2: fast pairwise alignment for long DNA sequences}
\author[Li]{Heng Li}
\address{Broad Institute, 415 Main Street, Cambridge, MA 02142, USA}
\maketitle
\begin{abstract}
\section{Summary:} Minimap2 is a general-purpose mapper to align long noisy DNA
sequences against a large reference database. It targets query sequences of
1kb--100Mb in length with per-base divergence typically below 25\%. Minimap2 is
$\sim$30 times faster than many mainstream long-read aligners and achieves
higher accuracy on simulated data. It also employs concave gap cost and rescues
inversions for improved alignment around potential structural variations.
\section{Availability and implementation:}
\href{https://github.com/lh3/minimap2}{https://github.com/lh3/minimap2}
\section{Contact:} hengli@broadinstitute.org
\end{abstract}
\section{Introduction}
Single Molecule Real-Time (SMRT) sequencing technology and Oxford Nanopore
technologies (ONT) produce reads over 10kbp in length at an error rate
$\sim$15\%. Several aligners have been developed for such
data~\citep{Chaisson:2012aa,Li:2013aa,Liu:2016ab,Sovic:2016aa,Liu:2017aa,Lin:2017aa,Sedlazeck169557}.
They are usually five times as slow as mainstream short-read
aligners~\citep{Langmead:2012fk,Li:2013aa}. We speculated there could be
substantial room for speedup on the thought that 10kb long sequences should be
easier to map than 100bp reads because we can more effectively skip repetitive
regions, which are often the bottleneck of short-read alignment. We confirmed
our speculation by achieving approximate mapping 50 times faster than
BWA-MEM~\citep{Li:2016aa}. \citet{Suzuki:2016} extended our work with a fast
and novel algorithm on generating detailed alignment, which in turn inspired us
to develop minimap2 towards higher accuracy and more practical functionality.
\begin{methods}
\section{Methods}
Minimap2 is the successor of minimap~\citep{Li:2016aa}. It uses similar
indexing and seeding algorithms, and furthers it with more accurate chaining
and the ability to produce detailed alignment.
\subsection{Chaining}
%\subsubsection{Chaining}
An \emph{anchor} is a 3-tuple $(x,y,w)$, indicating interval $[x-w+1,x]$ on the
reference matching interval $[y-w+1,y]$ on the query. Given a list of anchors
sorted by ending reference position $x$, let $f(i)$ be the maximal chaining
score up to the $i$-th anchor in the list. $f(i)$ can be calculated with
dynamic programming (DP):
\begin{equation}\label{eq:chain}
f(i)=\max\big\{\max_{i>j\ge 1} \{ f(j)+d(j,i)-\gamma(j,i) \},w_i\big\}
\end{equation}
where $d(j,i)=\min\big\{\min\{y_i-y_j,x_i-x_j\},w_i\big\}$ is the number of
matching bases between the two anchors. $\gamma(j,i)>0$ is the gap cost. It
equals $\infty$ if $y_j\ge y_i$ or $\max\{y_i-y_j,x_i-x_j\}>G$ (i.e. the
distance between two anchors is too large); otherwise
\[
\gamma(j,i)=\gamma'(\max\{y_i-y_j,x_i-x_j\}-\min\{y_i-y_j,x_i-x_j\})
\]
In implementation, a gap of length $l$ costs $\gamma'(l)=\alpha\cdot
l+\beta\log_2(l)$. For $m$ anchors, directly computing all $f(\cdot)$ with
Eq.~(\ref{eq:chain}) takes $O(m^2)$ time. Although theoretically faster
chaining algorithms exist~\citep{Abouelhoda:2005aa}, they
are inapplicable to generic gap cost, complex to implement and usually
associated with a large constant. We introduced a simple heuristic to
accelerate chaining.
We note that if anchor $i$ is chained to $j$, chaining $i$ to a predecessor
of $j$ is likely to yield a lower score. When evaluating Eq.~(\ref{eq:chain}),
we start from anchor $i-1$ and stop the evaluation if we cannot find a better
score after up to $h$ iterations. This approach reduces the average time to
$O(h\cdot m)$. In practice, we can almost always find the optimal chain with
$h=50$; even if the heuristic fails, the optimal chain is often close.
%\subsubsection{Backtracking}
For backtracking, let $P(i)$ be the index of the best predecessor of anchor
$i$. It equals 0 if $f(i)=w_i$ or $\argmax_j\{f(j)+\eta(j,i)-\gamma(j,i)\}$
otherwise. For each anchor $i$ in the descending order of $f(i)$, we apply
$P(\cdot)$ repeatedly to find its predecessor and mark each visited $i$ as
`used', until $P(i)=0$ or we reach an already `used' $i$. This way we find all
chains with no anchors used in more than one chains.
%\subsubsection{Identifying primary chains}
In the absence of copy number changes, each query segment should not be mapped
to two places in the reference. However, chains found at the previous step may
have significant or complete overlaps due to repeats in the reference.
Minimap2 used the following procedure to identify \emph{primary chains} that do
not greatly overlap on the query. Let $Q$ be an empty set initially. For each
chain from the best to the worst according to their chaining scores: if on the
query, the chain overlaps with a chain in $Q$ by 50\% or higher percentage of
the shorter chain, mark the chain as secondary to the chain in $Q$; otherwise,
add the chain to $Q$. In the end, $Q$ contains all the primary chains. We did
not choose a more sophisticated data structure (e.g. range tree or k-d tree)
because this step is not the performance bottleneck.
\subsection{Alignment}
Minimap2 performs global alignment between adjacent anchors in a chain. It
adopted difference-based formulation to derive
alignment~\citep{Wu:1996aa,Suzuki:2016}. When combined with SSE vectorization,
this formulation has two advantages. First, because each score in the DP matrix
is bounded by the gap cost and the maximum matching score, we can usually
achieve 16-way SSE vectorization regardless of the peak score of the
alignment. Second, filling the DP matrix along the diagonal, we can simplify
banded alignment, which is critical to performance. In practice, our
implementation is three times as fast as Parasail's 4-way
vectorization~\citep{Daily:2016aa} for global alignment.
Without banding, our implementation is slower than Edlib~\citep{Sosic:2017aa},
but with a 1000bp band, it is considerably faster. When performing global
alignment between anchors, we expect the alignment to stay close to the
diagonal of the DP matrix. Banding is often applicable.
Minimap2 uses a 2-piece affine gap cost
$\gamma(l)=\min\{q+l\cdot e,\tilde{q}+l\cdot\tilde{e}\}$.
On the condition that $q+e<\tilde{q}+\tilde{e}$ and $e>\tilde{e}$, this
cost function is concave. It applies cost $q+l\cdot e$ to gaps shorter than
$\lceil(\tilde{q}-q)/(e-\tilde{e})\rceil$ and applies
$\tilde{q}+l\cdot\tilde{e}$ to longer gaps. This scheme helps to recover
longer insertions and deletions~(INDEL; \citealp{Gotoh:1990aa}).
With global alignment, minimap2 may force to align unrelated sequences between
two adjacent anchors. To avoid such an artifact, we compute accumulative
alignment score along the alignment path and break the alignment where the
score drops too fast in the diagonal direction. More precisely, let $S(i,j)$ be
the alignment score along the alignment path ending at cell $(i,j)$ in the DP
matrix. We break the alignment if there exist $(i',j')$ and $(i,j)$, $i'<i$ and
$j'<j$, such that
\[
S(i',j')-S(i,j)>Z+e\cdot(\max\{i-i',j-j'\}-\min\{i-i',j-j'\})
\]
where $e$ is the gap extension cost and $Z$ is an arbitrary threshold.
This strategy is similar to X-drop employed in BLAST~\citep{Altschul:1997vn}.
However, unlike X-drop, it would not break the alignment in the presence of a
single long gap.
When minimap2 breaks a global alignment between two anchors, it performs local
alignment between the two subsequences involved in the global alignment, but
this time with the one subsequence reverse complemented. This additional
alignment step may identify short inversions that are missed during chaining.
\end{methods}
\section{Results}
\begin{figure}[!tb]
\centering
\includegraphics[width=.5\textwidth]{roc-color.pdf}
\caption{Evaluation on simulated SMRT reads aligned against human genome
GRCh38. (a) ROC-like curve. (b) Accumulative mapping error rate as a function
of mapping quality. 33,088 $\ge$1000bp reads were simulated using
pbsim~\citep{Ono:2013aa} with error profile sampled from file
`m131017\_060208\_42213\_*.1.*' downloaded at
\href{http://bit.ly/chm1p5c3}{http://bit.ly/chm1p5c3}. The N50 read length is
11,628. A read is considered correctly mapped if the true position overlaps
with the best mapping position by 10\% of the read length. All aligners were
run under the default setting for SMRT reads.}\label{fig:eval}
\end{figure}
As a sanity check, we evaluated minimap2 on simulated human reads along with
BLASR~\citep{Chaisson:2012aa},
BWA-MEM~\citep{Li:2013aa},
GraphMap~\citep{Sovic:2016aa},
minialign~\citep{Suzuki:2016} and
NGMLR~\citep{Sedlazeck169557}. We excluded rHAT~\citep{Liu:2016ab},
LAMSA~\citep{Liu:2017aa} and Kart~\citep{Lin:2017aa} because they either
crashed or produced malformatted output. In this evaluation, Minimap2 has
higher power to distinguish unique and repetitive hits, and achieves overall
higher mapping accuracy (Fig.~\ref{fig:eval}a). It is still the most accurate
even if we skip DP-based alignment (data not shown), suggesting chaining alone
is sufficient to achieve high accuracy for approximate mapping. Minimap2 and
NGMLR provide better mapping quality estimate: they rarely give repetitive hits
high mapping quality (Fig.~\ref{fig:eval}b). Apparently, other aligners may
occasionally miss close suboptimal hits and be overconfident in wrong mappings.
On run time, minialign is slightly faster than minimap2. They are over 30 times
faster than the rest. Minimap2 consumed 6.1GB memory at the peak, more than
BWA-MEM but less than others.
On real human SMRT reads, the relative performance and sensitivity of
these aligners are broadly similar to those on simulated data. We are unable to
provide a good estimate of mapping error rate due to the lack of the truth. On
ONT ultra-long human reads~\citep{Jain128835}, BWA-MEM failed. Minialign and
minimap2 are over 70 times faster than others. We have also examined tens of
$\ge$100bp INDELs in IGV~\citep{Robinson:2011aa} and can confirm the
observation by~\citet{Sedlazeck169557} that BWA-MEM often breaks them into
shorter gaps. Minimap2 does not have this issue.
\section{Discussions}
Minialign and minimap2 are fast because a) with chaining, they can quickly
filter out most false seed hits~\citep{Li:2016aa} and reduce unsuccessful but
costly DP-based alignments; b) they implemented so far the fastest DP-based
alignment algorithm for long sequences~\citep{Suzuki:2016}. It is possible to
further accelerate minimap2 with a few other tweaks such as adaptive
banding~\citep{Suzuki130633} or incremental banding.
In addition to reference-based read mapping, minimap2 inherits minimap's
ability to search against huge multi-species databases and to find read
overlaps. On a few test data sets, minimap2 appears to yield slightly better
miniasm assembly. Minimap2 can also align closely related genomes, though it
would benefit from more thorough evaluations. Genome alignment is an intricate
topic.
\section*{Acknowledgements}
We owe a debt of gratitude to Hajime Suzuki for releasing his masterpiece and
insightful notes before formal publication. We thank M. Schatz, P. Rescheneder
and F. Sedlazeck for pointing out the limitation of BWA-MEM. We are also
grateful to early minimap2 testers who have greatly helped to fix various
issues.
\bibliography{minimap2}
\pagebreak
\appendix
\begin{methods}
\section*{Appendix}
A 2-piece gap cost function is
\[
\gamma(l)=\min\{q+l\cdot e,\tilde{q}+l\cdot\tilde{e}\}
\]
Without losing generality, we assume $q+e\le\tilde{q}+\tilde{e}$. The equation
to compute the optimal alignment under such a gap cost is~\citep{Gotoh:1990aa}
\begin{equation}\label{eq:ae86}
\left\{\begin{array}{l}
H_{ij} = \max\{H_{i-1,j-1}+s(i,j),E_{ij},F_{ij},\tilde{E}_{ij},\tilde{F}_{ij}\}\\
E_{i+1,j}= \max\{H_{ij}-q,E_{ij}\}-e\\
F_{i,j+1}= \max\{H_{ij}-q,F_{ij}\}-e\\
\tilde{E}_{i+1,j}= \max\{H_{ij}-\tilde{q},\tilde{E}_{ij}\}-\tilde{e}\\
\tilde{F}_{i,j+1}= \max\{H_{ij}-\tilde{q},\tilde{F}_{ij}\}-\tilde{e}
\end{array}\right.
\end{equation}
where $s(i,j)$ is the score between the $i$-th reference base and $j$-th query
base. If we define
\[
\left\{\begin{array}{ll}
u_{ij}\triangleq H_{ij}-H_{i-1,j} & v_{ij}\triangleq H_{ij}-H_{i,j-1} \\
x_{ij}\triangleq E_{i+1,j}-H_{ij} & \tilde{x}_{ij}\triangleq \tilde{E}_{i+1,j}-\tilde{H}_{ij} \\
y_{ij}\triangleq F_{i,j+1}-H_{ij} & \tilde{y}_{ij}\triangleq \tilde{F}_{i,j+1}-\tilde{H}_{ij}
\end{array}\right.
\]
we can transform Eq.~(\ref{eq:ae86}) to
\begin{equation}\label{eq:suzuki}
\left\{\begin{array}{lll}
z_{ij}&=&\max\{s(i,j),x_{i-1,j}+v_{i-1,j},y_{i,j-1}+u_{i,j-1},\\
&&\tilde{x}_{i-1,j}+v_{i-1,j},\tilde{y}_{i,j-1}+u_{i,j-1}\}\\
u_{ij}&=&z_{ij}-v_{i-1,j}\\
v_{ij}&=&z_{ij}-u_{i,j-1}\\
x_{ij}&=&\max\{0,x_{i-1,j}+v_{i-1,j}-z_{ij}+q\}-q-e\\
y_{ij}&=&\max\{0,y_{i,j-1}+u_{i,j-1}-z_{ij}+q\}-q-e\\
\tilde{x}_{ij}&=&\max\{0,\tilde{x}_{i-1,j}+v_{i-1,j}-z_{ij}+\tilde{q}\}-\tilde{q}-\tilde{e}\\
\tilde{y}_{ij}&=&\max\{0,\tilde{y}_{i,j-1}+u_{i,j-1}-z_{ij}+\tilde{q}\}-\tilde{q}-\tilde{e}
\end{array}\right.
\end{equation}
where $z_{ij}$ is a temporary variable that does not need to be stored.
All values in Eq.~(\ref{eq:suzuki}) are bounded. To see that,
\[
x_{ij}=E_{i+1,j}-H_{ij}=\max\{-q,E_{ij}-H_{ij}\}-e
\]
With $E_{ij}\le H_{ij}$, we have
\[
-q-e\le x_{ij}\le\max\{-q,0\}-e=-e
\]
and similar inequations for $y_{ij}$, $\tilde{x}_{ij}$ and $\tilde{y}_{ij}$.
In addition,
\[
u_{ij}=z_{ij}-v_{i-1,j}\ge\max\{x_{i-1,j},\tilde{x}_{i-1,j}\}\ge-q-e
\]
As the maximum value of $z_{ij}=H_{ij}-H_{i-1,j-1}$ is $M$, the maximal
matching score, we can derive
\[
u_{ij}\le M-v_{i-1,j}\le M+q+e
\]
In conclusion, $x$ and $y$ are bounded by $[-q-e,-e]$, $\tilde{x}$ and $\tilde{y}$ by
$[-\tilde{q}-\tilde{e},-\tilde{e}]$, and $u$ and $v$ by $[-q-e,M+q+e]$. When
matching score and gap cost are small, each of them can be stored as a 8-bit
integer. This enables 16-way SSE vectorization regardless of the peak score of
the alignment.
For a more efficient SSE implementation, we transform the row-column coordinate
to diagonal-anti-diagonal coordinate by letting $r\gets i+j$ and $t\gets i$.
Eq.~(\ref{eq:suzuki}) becomes:
\begin{equation*}
\left\{\begin{array}{lll}
z_{rt}&=&\max\{s(t,r-t),x_{r-1,t-1}+v_{r-1,t-1},y_{r-1,t}+u_{r-1,t},\\
&&\tilde{x}_{r-1,t-1}+v_{r-1,t-1},\tilde{y}_{r-1,t}+u_{r-1,t}\}\\
u_{rt}&=&z_{rt}-v_{r-1,t-1}\\
v_{rt}&=&z_{rt}-u_{r-1,t}\\
x_{rt}&=&\max\{0,x_{r-1,t-1}+v_{r-1,t-1}-z_{rt}+q\}-q-e\\
y_{rt}&=&\max\{0,y_{r-1,t}+u_{r-1,t}-z_{rt}+q\}-q-e\\
\tilde{x}_{rt}&=&\max\{0,\tilde{x}_{r-1,t-1}+v_{r-1,t-1}-z_{rt}+\tilde{q}\}-\tilde{q}-\tilde{e}\\
\tilde{y}_{rt}&=&\max\{0,\tilde{y}_{r-1,t}+u_{r-1,t}-z_{rt}+\tilde{q}\}-\tilde{q}-\tilde{e}
\end{array}\right.
\end{equation*}
In this formulation, cells with the same diagonal index $r$ are independent of
each other. This allows us to fully vectorize the computation of all cells on
the same anti-diagonal in one inner loop.
On the condition that $q+e<\tilde{q}+\tilde{e}$ and $e>\tilde{e}$, the boundary
condition of this equation in the diagonal-anti-diagonal coordinate is
\[
\left\{\begin{array}{l}
x_{r-1,-1}=y_{r-1,r}=-q-e\\
\tilde{x}_{r-1,-1}=\tilde{y}_{r-1,r}=-\tilde{q}-\tilde{e}\\
u_{r-1,r}=v_{r-1,-1}=\eta(r)\\
\end{array}\right.
\]
where
\[
\eta(r)=\left\{\begin{array}{ll}
-q-e & (r=0) \\
-e & (r<\lceil\frac{\tilde{q}-q}{e-\tilde{e}}-1\rceil) \\
r\cdot(e-\tilde{e})-(\tilde{q}-q)-\tilde{e} & (r=\lceil\frac{\tilde{q}-q}{e-\tilde{e}}-1\rceil) \\
-\tilde{e} & (r>\lceil\frac{\tilde{q}-q}{e-\tilde{e}}-1\rceil)
\end{array}\right.
\]
\citet{Suzuki:2016} first derived a similar set of equations under affine gap
cost but with different notations.
\end{methods}
\end{document}

View File

@ -0,0 +1,30 @@
Q 60 32066 0 0.000000000
Q 40 32 1 0.000031155
Q 38 19 1 0.000062272
Q 36 11 1 0.000093376
Q 35 32 1 0.000124378
Q 33 15 1 0.000155400
Q 32 58 1 0.000186145
Q 27 11 1 0.000217095
Q 26 80 1 0.000247494
Q 21 19 2 0.000309186
Q 20 16 1 0.000339936
Q 19 19 1 0.000370622
Q 18 22 2 0.000432099
Q 17 37 5 0.000585751
Q 15 24 2 0.000646930
Q 14 18 3 0.000738939
Q 13 30 6 0.000922821
Q 12 18 1 0.000953054
Q 11 29 2 0.001013638
Q 10 30 1 0.001043393
Q 9 20 5 0.001196099
Q 8 25 8 0.001440348
Q 7 28 6 0.001622830
Q 6 35 12 0.001988132
Q 5 34 12 0.002352725
Q 4 29 8 0.002594865
Q 3 36 14 0.003018937
Q 2 46 15 0.003471482
Q 1 69 36 0.004558162
Q 0 167 94 0.007377173

17
tex/mm2.eval 100644
View File

@ -0,0 +1,17 @@
Q 60 32072 0 0.000000000
Q 43 206 1 0.000030981
Q 27 201 1 0.000061578
Q 15 59 1 0.000092200
Q 12 25 1 0.000122839
Q 11 16 1 0.000153473
Q 10 24 1 0.000184032
Q 9 17 2 0.000245248
Q 8 27 3 0.000336938
Q 7 23 1 0.000367309
Q 6 20 1 0.000397675
Q 5 18 4 0.000519751
Q 4 17 1 0.000550038
Q 3 29 5 0.000702204
Q 2 32 4 0.000823522
Q 1 54 6 0.001004872
Q 0 234 106 0.004202697

1288
tex/natbib.bst 100644

File diff suppressed because it is too large Load Diff

803
tex/natbib.sty 100644
View File

@ -0,0 +1,803 @@
%%
%% This is file `natbib.sty',
%% generated with the docstrip utility.
%%
%% The original source files were:
%%
%% natbib.dtx (with options: `package,all')
%% =============================================
%% IMPORTANT NOTICE:
%%
%% This program can be redistributed and/or modified under the terms
%% of the LaTeX Project Public License Distributed from CTAN
%% archives in directory macros/latex/base/lppl.txt; either
%% version 1 of the License, or any later version.
%%
%% This is a generated file.
%% It may not be distributed without the original source file natbib.dtx.
%%
%% Full documentation can be obtained by LaTeXing that original file.
%% Only a few abbreviated comments remain here to describe the usage.
%% =============================================
%% Copyright 1993-2000 Patrick W Daly
%% Max-Planck-Institut f\"ur Aeronomie
%% Max-Planck-Str. 2
%% D-37191 Katlenburg-Lindau
%% Germany
%% E-mail: daly@linmpi.mpg.de
\NeedsTeXFormat{LaTeX2e}[1995/06/01]
\ProvidesPackage{natbib}
[2000/07/24 7.0a (PWD)]
% This package reimplements the LaTeX \cite command to be used for various
% citation styles, both author-year and numerical. It accepts BibTeX
% output intended for many other packages, and therefore acts as a
% general, all-purpose citation-style interface.
%
% With standard numerical .bst files, only numerical citations are
% possible. With an author-year .bst file, both numerical and
% author-year citations are possible.
%
% If author-year citations are selected, \bibitem must have one of the
% following forms:
% \bibitem[Jones et al.(1990)]{key}...
% \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}...
% \bibitem[Jones et al., 1990]{key}...
% \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones
% et al.}{1990}]{key}...
% \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}...
% \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}...
% \bibitem[\protect\citename{Jones et al., }1990]{key}...
% \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}...
%
% This is either to be made up manually, or to be generated by an
% appropriate .bst file with BibTeX.
% Author-year mode || Numerical mode
% Then, \citet{key} ==>> Jones et al. (1990) || Jones et al. [21]
% \citep{key} ==>> (Jones et al., 1990) || [21]
% Multiple citations as normal:
% \citep{key1,key2} ==>> (Jones et al., 1990; Smith, 1989) || [21,24]
% or (Jones et al., 1990, 1991) || [21,24]
% or (Jones et al., 1990a,b) || [21,24]
% \cite{key} is the equivalent of \citet{key} in author-year mode
% and of \citep{key} in numerical mode
% Full author lists may be forced with \citet* or \citep*, e.g.
% \citep*{key} ==>> (Jones, Baker, and Williams, 1990)
% Optional notes as:
% \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
% \citep[e.g.,][]{key} ==>> (e.g., Jones et al., 1990)
% \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34)
% (Note: in standard LaTeX, only one note is allowed, after the ref.
% Here, one note is like the standard, two make pre- and post-notes.)
% \citealt{key} ==>> Jones et al. 1990
% \citealt*{key} ==>> Jones, Baker, and Williams 1990
% \citealp{key} ==>> Jones et al., 1990
% \citealp*{key} ==>> Jones, Baker, and Williams, 1990
% Additional citation possibilities (both author-year and numerical modes)
% \citeauthor{key} ==>> Jones et al.
% \citeauthor*{key} ==>> Jones, Baker, and Williams
% \citeyear{key} ==>> 1990
% \citeyearpar{key} ==>> (1990)
% \citetext{priv. comm.} ==>> (priv. comm.)
% Note: full author lists depends on whether the bib style supports them;
% if not, the abbreviated list is printed even when full requested.
%
% For names like della Robbia at the start of a sentence, use
% \Citet{dRob98} ==>> Della Robbia (1998)
% \Citep{dRob98} ==>> (Della Robbia, 1998)
% \Citeauthor{dRob98} ==>> Della Robbia
%
%
% Citation aliasing is achieved with
% \defcitealias{key}{text}
% \citetalias{key} ==>> text
% \citepalias{key} ==>> (text)
%
% Defining the citation style of a given bib style:
% Use \bibpunct (in the preamble only) with 6 mandatory arguments:
% 1. opening bracket for citation
% 2. closing bracket
% 3. citation separator (for multiple citations in one \cite)
% 4. the letter n for numerical styles, s for superscripts
% else anything for author-year
% 5. punctuation between authors and date
% 6. punctuation between years (or numbers) when common authors missing
% One optional argument is the character coming before post-notes. It
% appears in square braces before all other arguments. May be left off.
% Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,}
%
% To make this automatic for a given bib style, named newbib, say, make
% a local configuration file, natbib.cfg, with the definition
% \newcommand{\bibstyle@newbib}{\bibpunct...}
% Then the \bibliographystyle{newbib} will cause \bibstyle@newbib to
% be called on THE NEXT LATEX RUN (via the aux file).
%
% Such preprogrammed definitions may be invoked in the text (preamble only)
% by calling \citestyle{newbib}. This is only useful if the style specified
% differs from that in \bibliographystyle.
%
% With \citeindextrue and \citeindexfalse, one can control whether the
% \cite commands make an automatic entry of the citation in the .idx
% indexing file. For this, \makeindex must also be given in the preamble.
%
% LaTeX2e Options: (for selecting punctuation)
% round - round parentheses are used (default)
% square - square brackets are used [option]
% curly - curly braces are used {option}
% angle - angle brackets are used <option>
% colon - multiple citations separated by colon (default)
% comma - separated by comma
% authoryear - selects author-year citations (default)
% numbers- selects numerical citations
% super - numerical citations as superscripts
% sort - sorts multiple citations according to order in ref. list
% sort&compress - like sort, but also compresses numerical citations
% longnamesfirst - makes first citation full author list
% sectionbib - puts bibliography in a \section* instead of \chapter*
% Punctuation so selected dominates over any predefined ones.
% LaTeX2e options are called as, e.g.
% \usepackage[square,comma]{natbib}
% LaTeX the source file natbib.dtx to obtain more details
% or the file natnotes.tex for a brief reference sheet.
%-----------------------------------------------------------
\@ifclassloaded{aguplus}{\PackageError{natbib}
{The aguplus class already includes natbib coding,\MessageBreak
so you should not add it explicitly}
{Type <Return> for now, but then later remove\MessageBreak
the command \protect\usepackage{natbib} from the document}
\endinput}{}
\@ifclassloaded{nlinproc}{\PackageError{natbib}
{The nlinproc class already includes natbib coding,\MessageBreak
so you should not add it explicitly}
{Type <Return> for now, but then later remove\MessageBreak
the command \protect\usepackage{natbib} from the document}
\endinput}{}
\@ifclassloaded{egs}{\PackageError{natbib}
{The egs class already includes natbib coding,\MessageBreak
so you should not add it explicitly}
{Type <Return> for now, but then later remove\MessageBreak
the command \protect\usepackage{natbib} from the document}
\endinput}{}
% Define citation punctuation for some author-year styles
% One may add and delete at this point
% Or put additions into local configuration file natbib.cfg
\newcommand\bibstyle@chicago{\bibpunct{(}{)}{;}{a}{,}{,}}
\newcommand\bibstyle@named{\bibpunct{[}{]}{;}{a}{,}{,}}
\newcommand\bibstyle@agu{\bibpunct{[}{]}{;}{a}{,}{,~}}%Amer. Geophys. Union
\newcommand\bibstyle@egs{\bibpunct{(}{)}{;}{a}{,}{,}}%Eur. Geophys. Soc.
\newcommand\bibstyle@agsm{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
\newcommand\bibstyle@kluwer{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
\newcommand\bibstyle@dcu{\bibpunct{(}{)}{;}{a}{;}{,}\gdef\harvardand{and}}
\newcommand\bibstyle@aa{\bibpunct{(}{)}{;}{a}{}{,}} %Astronomy & Astrophysics
\newcommand\bibstyle@pass{\bibpunct{(}{)}{;}{a}{,}{,}}%Planet. & Space Sci
\newcommand\bibstyle@anngeo{\bibpunct{(}{)}{;}{a}{,}{,}}%Annales Geophysicae
\newcommand\bibstyle@nlinproc{\bibpunct{(}{)}{;}{a}{,}{,}}%Nonlin.Proc.Geophys.
% Define citation punctuation for some numerical styles
\newcommand\bibstyle@cospar{\bibpunct{/}{/}{,}{n}{}{}%
\gdef\NAT@biblabelnum##1{##1.}}
\newcommand\bibstyle@esa{\bibpunct{(Ref.~}{)}{,}{n}{}{}%
\gdef\NAT@biblabelnum##1{##1.\hspace{1em}}}
\newcommand\bibstyle@nature{\bibpunct{}{}{,}{s}{}{\textsuperscript{,}}%
\gdef\NAT@biblabelnum##1{##1.}}
% The standard LaTeX styles
\newcommand\bibstyle@plain{\bibpunct{[}{]}{,}{n}{}{,}}
\let\bibstyle@alpha=\bibstyle@plain
\let\bibstyle@abbrv=\bibstyle@plain
\let\bibstyle@unsrt=\bibstyle@plain
% The author-year modifications of the standard styles
\newcommand\bibstyle@plainnat{\bibpunct{[}{]}{,}{a}{,}{,}}
\let\bibstyle@abbrvnat=\bibstyle@plainnat
\let\bibstyle@unsrtnat=\bibstyle@plainnat
\newif\ifNAT@numbers \NAT@numbersfalse
\newif\ifNAT@super \NAT@superfalse
\DeclareOption{numbers}{\NAT@numberstrue
\ExecuteOptions{square,comma,nobibstyle}}
\DeclareOption{super}{\NAT@supertrue\NAT@numberstrue
\renewcommand\NAT@open{}\renewcommand\NAT@close{}
\ExecuteOptions{nobibstyle}}
\DeclareOption{authoryear}{\NAT@numbersfalse
\ExecuteOptions{round,colon,bibstyle}}
\DeclareOption{round}{%
\renewcommand\NAT@open{(} \renewcommand\NAT@close{)}
\ExecuteOptions{nobibstyle}}
\DeclareOption{square}{%
\renewcommand\NAT@open{[} \renewcommand\NAT@close{]}
\ExecuteOptions{nobibstyle}}
\DeclareOption{angle}{%
\renewcommand\NAT@open{$<$} \renewcommand\NAT@close{$>$}
\ExecuteOptions{nobibstyle}}
\DeclareOption{curly}{%
\renewcommand\NAT@open{\{} \renewcommand\NAT@close{\}}
\ExecuteOptions{nobibstyle}}
\DeclareOption{comma}{\renewcommand\NAT@sep{,}
\ExecuteOptions{nobibstyle}}
\DeclareOption{colon}{\renewcommand\NAT@sep{;}
\ExecuteOptions{nobibstyle}}
\DeclareOption{nobibstyle}{\let\bibstyle=\@gobble}
\DeclareOption{bibstyle}{\let\bibstyle=\@citestyle}
\newif\ifNAT@openbib \NAT@openbibfalse
\DeclareOption{openbib}{\NAT@openbibtrue}
\DeclareOption{sectionbib}{\def\NAT@sectionbib{on}}
\def\NAT@sort{0}
\DeclareOption{sort}{\def\NAT@sort{1}}
\DeclareOption{sort&compress}{\def\NAT@sort{2}}
\@ifpackageloaded{cite}{\PackageWarningNoLine{natbib}
{The `cite' package should not be used\MessageBreak
with natbib. Use option `sort' instead}\ExecuteOptions{sort}}{}
\newif\ifNAT@longnames\NAT@longnamesfalse
\DeclareOption{longnamesfirst}{\NAT@longnamestrue}
\DeclareOption{nonamebreak}{\def\NAT@nmfmt#1{\mbox{\NAT@up#1}}}
\def\NAT@nmfmt#1{{\NAT@up#1}}
\renewcommand\bibstyle[1]{\@ifundefined{bibstyle@#1}{\relax}
{\csname bibstyle@#1\endcsname}}
\AtBeginDocument{\global\let\bibstyle=\@gobble}
\let\@citestyle\bibstyle
\newcommand\citestyle[1]{\@citestyle{#1}\let\bibstyle\@gobble}
\@onlypreamble{\citestyle}\@onlypreamble{\@citestyle}
\newcommand\bibpunct[7][, ]%
{\gdef\NAT@open{#2}\gdef\NAT@close{#3}\gdef
\NAT@sep{#4}\global\NAT@numbersfalse\ifx #5n\global\NAT@numberstrue
\else
\ifx #5s\global\NAT@numberstrue\global\NAT@supertrue
\fi\fi
\gdef\NAT@aysep{#6}\gdef\NAT@yrsep{#7}%
\gdef\NAT@cmt{#1}%
\global\let\bibstyle\@gobble
}
\@onlypreamble{\bibpunct}
\newcommand\NAT@open{(} \newcommand\NAT@close{)}
\newcommand\NAT@sep{;}
\ProcessOptions
\newcommand\NAT@aysep{,} \newcommand\NAT@yrsep{,}
\newcommand\NAT@cmt{, }
\newcommand\NAT@cite%
[3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\ \fi
#1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
\newcommand\NAT@citenum%
[3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\ \fi
#1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
\newcommand\NAT@citesuper[3]{\ifNAT@swa
\unskip\hspace{1\p@}\textsuperscript{#1}%
\if*#3*\else\ (#3)\fi\else #1\fi\endgroup}
\providecommand
\textsuperscript[1]{\mbox{$^{\mbox{\scriptsize#1}}$}}
\providecommand\@firstofone[1]{#1}
\newcommand\NAT@citexnum{}
\def\NAT@citexnum[#1][#2]#3{%
\NAT@sort@cites{#3}%
\let\@citea\@empty
\@cite{\def\NAT@num{-1}\let\NAT@last@yr\relax\let\NAT@nm\@empty
\@for\@citeb:=\NAT@cite@list\do
{\edef\@citeb{\expandafter\@firstofone\@citeb}%
\if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
\@ifundefined{b@\@citeb\@extra@b@citeb}{%
{\reset@font\bfseries?}
\NAT@citeundefined\PackageWarning{natbib}%
{Citation `\@citeb' on page \thepage \space undefined}}%
{\let\NAT@last@num\NAT@num\let\NAT@last@nm\NAT@nm
\NAT@parse{\@citeb}%
\ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
\let\NAT@name=\NAT@all@names
\global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
\fi
\ifNAT@full\let\NAT@nm\NAT@all@names\else
\let\NAT@nm\NAT@name\fi
\ifNAT@swa
\ifnum\NAT@ctype>1\relax\@citea
\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\ifnum\NAT@ctype=2\relax\NAT@test{\NAT@ctype}%
\else\NAT@alias
\fi\hyper@natlinkend\else
\ifnum\NAT@sort>1
\begingroup\catcode`\_=8
\ifcat _\ifnum\z@<0\NAT@num _\else A\fi
\global\let\NAT@nm=\NAT@num \else \gdef\NAT@nm{-2}\fi
\ifcat _\ifnum\z@<0\NAT@last@num _\else A\fi
\global\@tempcnta=\NAT@last@num \global\advance\@tempcnta by\@ne
\else \global\@tempcnta\m@ne\fi
\endgroup
\ifnum\NAT@nm=\@tempcnta
\ifx\NAT@last@yr\relax
\edef\NAT@last@yr{\@citea \mbox{\noexpand\citenumfont{\NAT@num}}}%
\else
\edef\NAT@last@yr{--\penalty\@m\mbox{\noexpand\citenumfont{\NAT@num}}}%
\fi
\else
\NAT@last@yr \@citea \mbox{\citenumfont{\NAT@num}}%
\let\NAT@last@yr\relax
\fi
\else
\@citea \mbox{\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
{\citenumfont{\NAT@num}}\hyper@natlinkend}%
\fi
\fi
\def\@citea{\NAT@sep\penalty\@m\NAT@space}%
\else
\ifcase\NAT@ctype\relax
\ifx\NAT@last@nm\NAT@nm \NAT@yrsep\penalty\@m\NAT@space\else
\@citea \NAT@test{1}\ \NAT@@open
\if*#1*\else#1\ \fi\fi \NAT@mbox{%
\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
{\citenumfont{\NAT@num}}\hyper@natlinkend}%
\def\@citea{\NAT@@close\NAT@sep\penalty\@m\ }%
\or\@citea
\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@test{\NAT@ctype}\hyper@natlinkend
\def\@citea{\NAT@sep\penalty\@m\ }%
\or\@citea
\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@test{\NAT@ctype}\hyper@natlinkend
\def\@citea{\NAT@sep\penalty\@m\ }%
\or\@citea
\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@alias\hyper@natlinkend
\def\@citea{\NAT@sep\penalty\@m\ }%
\fi
\fi
}}%
\ifnum\NAT@sort>1\relax\NAT@last@yr\fi
\ifNAT@swa\else\ifnum\NAT@ctype=0\if*#2*\else
\NAT@cmt#2\fi \NAT@@close\fi\fi}{#1}{#2}}
\newcommand\NAT@test[1]{\ifnum#1=1 \ifx\NAT@nm\NAT@noname
{\reset@font\bfseries(author?)}\PackageWarning{natbib}
{Author undefined for citation`\@citeb'
\MessageBreak
on page \thepage}\else \NAT@nm \fi
\else \if\relax\NAT@date\relax
{\reset@font\bfseries(year?)}\PackageWarning{natbib}
{Year undefined for citation`\@citeb'
\MessageBreak
on page \thepage}\else \NAT@date \fi \fi}
\let\citenumfont=\relax
\newcommand\NAT@citex{}
\def\NAT@citex%
[#1][#2]#3{%
\NAT@sort@cites{#3}%
\let\@citea\@empty
\@cite{\let\NAT@nm\@empty\let\NAT@year\@empty
\@for\@citeb:=\NAT@cite@list\do
{\edef\@citeb{\expandafter\@firstofone\@citeb}%
\if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
\@ifundefined{b@\@citeb\@extra@b@citeb}{\@citea%
{\reset@font\bfseries ?}\NAT@citeundefined
\PackageWarning{natbib}%
{Citation `\@citeb' on page \thepage \space undefined}\def\NAT@date{}}%
{\let\NAT@last@nm=\NAT@nm\let\NAT@last@yr=\NAT@year
\NAT@parse{\@citeb}%
\ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
\let\NAT@name=\NAT@all@names
\global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
\fi
\ifNAT@full\let\NAT@nm\NAT@all@names\else
\let\NAT@nm\NAT@name\fi
\ifNAT@swa\ifcase\NAT@ctype
\if\relax\NAT@date\relax
\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}\NAT@date\hyper@natlinkend
\else
\ifx\NAT@last@nm\NAT@nm\NAT@yrsep
\ifx\NAT@last@yr\NAT@year
\hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@exlab
\hyper@natlinkend
\else\unskip\
\hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@date
\hyper@natlinkend
\fi
\else\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}%
\hyper@natlinkbreak{\NAT@aysep\ }{\@citeb\@extra@b@citeb}%
\NAT@date\hyper@natlinkend
\fi
\fi
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}\hyper@natlinkend
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@date\hyper@natlinkend
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@alias\hyper@natlinkend
\fi \def\@citea{\NAT@sep\ }%
\else\ifcase\NAT@ctype
\if\relax\NAT@date\relax
\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}\hyper@natlinkend
\else
\ifx\NAT@last@nm\NAT@nm\NAT@yrsep
\ifx\NAT@last@yr\NAT@year
\hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@exlab
\hyper@natlinkend
\else\unskip\
\hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@date
\hyper@natlinkend
\fi
\else\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}%
\hyper@natlinkbreak{\ \NAT@@open\if*#1*\else#1\ \fi}%
{\@citeb\@extra@b@citeb}%
\NAT@date\hyper@natlinkend\fi
\fi
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@nmfmt{\NAT@nm}\hyper@natlinkend
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@date\hyper@natlinkend
\or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
\NAT@alias\hyper@natlinkend
\fi \if\relax\NAT@date\relax\def\@citea{\NAT@sep\ }%
\else\def\@citea{\NAT@@close\NAT@sep\ }\fi
\fi
}}\ifNAT@swa\else\if*#2*\else\NAT@cmt#2\fi
\if\relax\NAT@date\relax\else\NAT@@close\fi\fi}{#1}{#2}}
\newif\ifNAT@par \NAT@partrue
\newcommand\NAT@@open{\ifNAT@par\NAT@open\fi}
\newcommand\NAT@@close{\ifNAT@par\NAT@close\fi}
\newcommand\NAT@alias{\@ifundefined{al@\@citeb\@extra@b@citeb}{%
{\reset@font\bfseries(alias?)}\PackageWarning{natbib}
{Alias undefined for citation `\@citeb'
\MessageBreak on page \thepage}}{\@nameuse{al@\@citeb\@extra@b@citeb}}}
\let\NAT@up\relax
\newcommand\NAT@Up[1]{{\let\protect\@unexpandable@protect\let~\relax
\expandafter\NAT@deftemp#1}\expandafter\NAT@UP\NAT@temp}
\newcommand\NAT@deftemp[1]{\xdef\NAT@temp{#1}}
\newcommand\NAT@UP[1]{\let\@tempa\NAT@UP\ifcat a#1\MakeUppercase{#1}%
\let\@tempa\relax\else#1\fi\@tempa}
\newcommand\shortcites[1]{%
\@bsphack\@for\@citeb:=#1\do
{\edef\@citeb{\expandafter\@firstofone\@citeb}%
\global\@namedef{bv@\@citeb\@extra@b@citeb}{}}\@esphack}
\newcommand\NAT@biblabel[1]{\hfill}
\newcommand\NAT@biblabelnum[1]{\bibnumfmt{#1}}
\newcommand\bibnumfmt[1]{[#1]}
\def\@tempa#1{[#1]}
\ifx\@tempa\@biblabel\let\@biblabel\@empty\fi
\newcommand\NAT@bibsetnum[1]{\settowidth\labelwidth{\@biblabel{#1}}%
\setlength{\leftmargin}{\labelwidth}\addtolength{\leftmargin}{\labelsep}%
\setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}%
\ifNAT@openbib
\addtolength{\leftmargin}{4mm}%
\setlength{\itemindent}{-4mm}%
\setlength{\listparindent}{\itemindent}%
\setlength{\parsep}{0pt}%
\fi
}
\newlength{\bibhang}
\setlength{\bibhang}{1em}
\newlength{\bibsep}
{\@listi \global\bibsep\itemsep \global\advance\bibsep by\parsep}
\newcommand\NAT@bibsetup%
[1]{\setlength{\leftmargin}{\bibhang}\setlength{\itemindent}{-\leftmargin}%
\setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}}
\newcommand\NAT@set@cites{\ifNAT@numbers
\ifNAT@super \let\@cite\NAT@citesuper
\def\NAT@mbox##1{\unskip\nobreak\hspace{1\p@}\textsuperscript{##1}}%
\let\citeyearpar=\citeyear
\let\NAT@space\relax\else
\let\NAT@mbox=\mbox
\let\@cite\NAT@citenum \def\NAT@space{ }\fi
\let\@citex\NAT@citexnum
\ifx\@biblabel\@empty\let\@biblabel\NAT@biblabelnum\fi
\let\@bibsetup\NAT@bibsetnum
\def\natexlab##1{}%
\else
\let\@cite\NAT@cite
\let\@citex\NAT@citex
\let\@biblabel\NAT@biblabel
\let\@bibsetup\NAT@bibsetup
\def\natexlab##1{##1}%
\fi}
\AtBeginDocument{\NAT@set@cites}
\AtBeginDocument{\ifx\SK@def\@undefined\else
\ifx\SK@cite\@empty\else
\SK@def\@citex[#1][#2]#3{\SK@\SK@@ref{#3}\SK@@citex[#1][#2]{#3}}\fi
\ifx\SK@citeauthor\@undefined\def\HAR@checkdef{}\else
\let\citeauthor\SK@citeauthor
\let\citefullauthor\SK@citefullauthor
\let\citeyear\SK@citeyear\fi
\fi}
\AtBeginDocument{\@ifpackageloaded{hyperref}{%
\ifnum\NAT@sort=2\def\NAT@sort{1}\fi}{}}
\newif\ifNAT@full\NAT@fullfalse
\newif\ifNAT@swa
\DeclareRobustCommand\citet
{\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@partrue
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\newcommand\NAT@citetp{\@ifnextchar[{\NAT@@citetp}{\NAT@@citetp[]}}
\newcommand\NAT@@citetp{}
\def\NAT@@citetp[#1]{\@ifnextchar[{\@citex[#1]}{\@citex[][#1]}}
\DeclareRobustCommand\citep
{\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@partrue
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\cite
{\begingroup\def\NAT@ctype{0}\NAT@partrue\NAT@swatrue
\@ifstar{\NAT@fulltrue\NAT@cites}{\NAT@fullfalse\NAT@cites}}
\newcommand\NAT@cites{\@ifnextchar [{\NAT@@citetp}{%
\ifNAT@numbers\else
\NAT@swafalse
\fi
\NAT@@citetp[]}}
\DeclareRobustCommand\citealt
{\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@parfalse
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\citealp
{\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@parfalse
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\citeauthor
{\begingroup\NAT@swafalse\def\NAT@ctype{1}\NAT@parfalse
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\Citet
{\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@partrue
\let\NAT@up\NAT@Up
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\Citep
{\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@partrue
\let\NAT@up\NAT@Up
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\Citealt
{\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@parfalse
\let\NAT@up\NAT@Up
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\Citealp
{\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@parfalse
\let\NAT@up\NAT@Up
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\Citeauthor
{\begingroup\NAT@swafalse\def\NAT@ctype{1}\NAT@parfalse
\let\NAT@up\NAT@Up
\@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
\DeclareRobustCommand\citeyear
{\begingroup\NAT@swafalse\def\NAT@ctype{2}\NAT@parfalse\NAT@citetp}
\DeclareRobustCommand\citeyearpar
{\begingroup\NAT@swatrue\def\NAT@ctype{2}\NAT@partrue\NAT@citetp}
\newcommand\citetext[1]{\NAT@open#1\NAT@close}
\DeclareRobustCommand\citefullauthor
{\citeauthor*}
\newcommand\defcitealias[2]{%
\@ifundefined{al@#1\@extra@b@citeb}{}
{\PackageWarning{natbib}{Overwriting existing alias for citation #1}}
\@namedef{al@#1\@extra@b@citeb}{#2}}
\DeclareRobustCommand\citetalias{\begingroup
\NAT@swafalse\def\NAT@ctype{3}\NAT@parfalse\NAT@citetp}
\DeclareRobustCommand\citepalias{\begingroup
\NAT@swatrue\def\NAT@ctype{3}\NAT@partrue\NAT@citetp}
\renewcommand\nocite[1]{\@bsphack
\@for\@citeb:=#1\do{%
\edef\@citeb{\expandafter\@firstofone\@citeb}%
\if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
\if*\@citeb\else
\@ifundefined{b@\@citeb\@extra@b@citeb}{%
\NAT@citeundefined \PackageWarning{natbib}%
{Citation `\@citeb' undefined}}{}\fi}%
\@esphack}
\newcommand\NAT@parse[1]{{%
\let\protect=\@unexpandable@protect\let~\relax
\let\active@prefix=\@gobble
\xdef\NAT@temp{\csname b@#1\@extra@b@citeb\endcsname}}%
\expandafter\NAT@split\NAT@temp
\expandafter\NAT@parse@date\NAT@date??????@@%
\ifciteindex\NAT@index\fi
}
\newcommand\NAT@split[4]{%
\gdef\NAT@num{#1}\gdef\NAT@name{#3}\gdef\NAT@date{#2}%
\gdef\NAT@all@names{#4}%
\ifx\NAT@noname\NAT@all@names \gdef\NAT@all@names{#3}\fi}
\newcommand\NAT@parse@date{}
\def\NAT@parse@date#1#2#3#4#5#6@@{%
\ifnum\the\catcode`#1=11\def\NAT@year{}\def\NAT@exlab{#1}\else
\ifnum\the\catcode`#2=11\def\NAT@year{#1}\def\NAT@exlab{#2}\else
\ifnum\the\catcode`#3=11\def\NAT@year{#1#2}\def\NAT@exlab{#3}\else
\ifnum\the\catcode`#4=11\def\NAT@year{#1#2#3}\def\NAT@exlab{#4}\else
\def\NAT@year{#1#2#3#4}\def\NAT@exlab{{#5}}\fi\fi\fi\fi}
\newcommand\NAT@index{}
\let\NAT@makeindex=\makeindex
\renewcommand\makeindex{\NAT@makeindex
\renewcommand\NAT@index{\@bsphack\begingroup
\def~{\string~}\@wrindex{\NAT@idxtxt}}}
\newcommand\NAT@idxtxt{\NAT@name\ \NAT@open\NAT@date\NAT@close}
\@ifundefined{@indexfile}{}{\let\NAT@makeindex\relax\makeindex}
\newif\ifciteindex \citeindexfalse
\newcommand\citeindextype{default}
\newcommand\NAT@index@alt{{\let\protect=\noexpand\let~\relax
\xdef\NAT@temp{\NAT@idxtxt}}\expandafter\NAT@exp\NAT@temp\@nil}
\newcommand\NAT@exp{}
\def\NAT@exp#1\@nil{\mbox{}\index[\citeindextype]{#1}}
\AtBeginDocument{%
\@ifpackageloaded{index}{\let\NAT@index=\NAT@index@alt}{}}
\newcommand\NAT@ifcmd{\futurelet\NAT@temp\NAT@ifxcmd}
\newcommand\NAT@ifxcmd{\ifx\NAT@temp\relax\else\expandafter\NAT@bare\fi}
\def\NAT@bare#1(#2)#3(@)#4\@nil#5{%
\if @#2
\expandafter\NAT@apalk#1, , \@nil{#5}\else
\stepcounter{NAT@ctr}%
\NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{#3}{#5}
\fi
}
\newcommand\NAT@wrout[5]{%
\if@filesw
{\let\protect\noexpand\let~\relax
\immediate
\write\@auxout{\string\bibcite{#5}{{#1}{#2}{{#3}}{{#4}}}}}\fi
\ignorespaces}
\def\NAT@noname{{}}
\renewcommand\bibitem{%
\@ifnextchar[{\@lbibitem}{%
\global\NAT@stdbsttrue
\stepcounter{NAT@ctr}\@lbibitem[\arabic{NAT@ctr}]}}
\def\@lbibitem[#1]#2{%
\if\relax\@extra@b@citeb\relax\else
\@ifundefined{br@#2\@extra@b@citeb}{}{%
\@namedef{br@#2}{\@nameuse{br@#2\@extra@b@citeb}}}\fi
\@ifundefined{b@#2\@extra@b@citeb}{\def\NAT@num{}}{\NAT@parse{#2}}%
\item[\hfil\hyper@natanchorstart{#2\@extra@b@citeb}\@biblabel{\NAT@num}%
\hyper@natanchorend]%
\NAT@ifcmd#1(@)(@)\@nil{#2}}
\ifx\SK@lbibitem\@undefined\else
\let\SK@lbibitem\@lbibitem
\def\@lbibitem[#1]#2{%
\SK@lbibitem[#1]{#2}\SK@\SK@@label{#2}\ignorespaces}\fi
\newif\ifNAT@stdbst \NAT@stdbstfalse
\AtEndDocument
{\ifNAT@stdbst\if@filesw\immediate\write\@auxout{\string
\global\string\NAT@numberstrue}\fi\fi
}
\providecommand\bibcite{}
\renewcommand\bibcite[2]{\@ifundefined{b@#1\@extra@binfo}\relax
{\NAT@citemultiple
\PackageWarningNoLine{natbib}{Citation `#1' multiply defined}}%
\global\@namedef{b@#1\@extra@binfo}{#2}}
\AtEndDocument{\NAT@swatrue\let\bibcite\NAT@testdef}
\newcommand\NAT@testdef[2]{%
\def\NAT@temp{#2}\expandafter \ifx \csname b@#1\@extra@binfo\endcsname
\NAT@temp \else \ifNAT@swa \NAT@swafalse
\PackageWarningNoLine{natbib}{Citation(s) may have
changed.\MessageBreak
Rerun to get citations correct}\fi\fi}
\newcommand\NAT@apalk{}
\def\NAT@apalk#1, #2, #3\@nil#4{\if\relax#2\relax
\global\NAT@stdbsttrue
\NAT@wrout{#1}{}{}{}{#4}\else
\stepcounter{NAT@ctr}%
\NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{}{#4}\fi}
\newcommand\citeauthoryear{}
\def\citeauthoryear#1#2#3(@)(@)\@nil#4{\stepcounter{NAT@ctr}\if\relax#3\relax
\NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{}{#4}\else
\NAT@wrout{\arabic {NAT@ctr}}{#3}{#2}{#1}{#4}\fi}
\newcommand\citestarts{\NAT@open}
\newcommand\citeends{\NAT@close}
\newcommand\betweenauthors{and}
\newcommand\astroncite{}
\def\astroncite#1#2(@)(@)\@nil#3{\stepcounter{NAT@ctr}\NAT@wrout{\arabic
{NAT@ctr}}{#2}{#1}{}{#3}}
\newcommand\citename{}
\def\citename#1#2(@)(@)\@nil#3{\expandafter\NAT@apalk#1#2, \@nil{#3}}
\newcommand\harvarditem[4][]%
{\if\relax#1\relax\bibitem[#2(#3)]{#4}\else
\bibitem[#1(#3)#2]{#4}\fi }
\newcommand\harvardleft{\NAT@open}
\newcommand\harvardright{\NAT@close}
\newcommand\harvardyearleft{\NAT@open}
\newcommand\harvardyearright{\NAT@close}
\AtBeginDocument{\providecommand{\harvardand}{and}}
\newcommand\harvardurl[1]{\textbf{URL:} \textit{#1}}
\providecommand\bibsection{}
\@ifundefined{chapter}%
{\renewcommand\bibsection{\section*{\refname
\@mkboth{\MakeUppercase{\refname}}{\MakeUppercase{\refname}}}}}
{\@ifundefined{NAT@sectionbib}%
{\renewcommand\bibsection{\chapter*{\bibname
\@mkboth{\MakeUppercase{\bibname}}{\MakeUppercase{\bibname}}}}}
{\renewcommand\bibsection{\section*{\bibname
\ifx\@mkboth\@gobbletwo\else\markright{\MakeUppercase{\bibname}}\fi}}}}
\@ifclassloaded{amsart}%
{\renewcommand\bibsection{\section*{\refname}}}{}
\@ifclassloaded{amsbook}%
{\renewcommand\bibsection{\chapter*{\bibname}}}{}
\@ifundefined{bib@heading}{}{\let\bibsection\bib@heading}
\newcounter{NAT@ctr}
\renewenvironment{thebibliography}[1]{%
\bibsection
\vspace{1\p@}\parindent \z@\bibpreamble\bibfont\list
{\@biblabel{\arabic{NAT@ctr}}}{\@bibsetup{#1}%
\setcounter{NAT@ctr}{0}}%
\ifNAT@openbib
\renewcommand\newblock{\par}
\else
\renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}%
\fi
\sloppy\clubpenalty4000\widowpenalty4000
\sfcode`\.=1000\relax
\let\citeN\cite \let\shortcite\cite
\let\citeasnoun\cite\fontsize{7}{9}\selectfont
}{\def\@noitemerr{%
\PackageWarning{natbib}
{Empty `thebibliography' environment}}%
\endlist\vskip-\lastskip}
\let\bibfont\relax
\let\bibpreamble\relax
\providecommand\reset@font{\relax}
\providecommand\bibname{Bibliography}
\providecommand\refname{References}
\newcommand\NAT@citeundefined{\gdef \NAT@undefined {%
\PackageWarningNoLine{natbib}{There were undefined citations}}}
\let \NAT@undefined \relax
\newcommand\NAT@citemultiple{\gdef \NAT@multiple {%
\PackageWarningNoLine{natbib}{There were multiply defined citations}}}
\let \NAT@multiple \relax
\AtEndDocument{\NAT@undefined\NAT@multiple}
\providecommand\@mkboth[2]{}
\providecommand\MakeUppercase{\uppercase}
\providecommand{\@extra@b@citeb}{}
\gdef\@extra@binfo{}
\providecommand\hyper@natanchorstart[1]{}
\providecommand\hyper@natanchorend{}
\providecommand\hyper@natlinkstart[1]{}
\providecommand\hyper@natlinkend{}
\providecommand\hyper@natlinkbreak[2]{#1}
\@ifundefined{bbl@redefine}{}{%
\bbl@redefine\nocite#1{%
\@safe@activestrue\org@nocite{#1}\@safe@activesfalse}%
\bbl@redefine\@lbibitem[#1]#2{%
\@safe@activestrue\org@@lbibitem[#1]{#2}\@safe@activesfalse}%
}
\AtBeginDocument{\@ifundefined{bbl@redefine}{}{%
\bbl@redefine\@citex[#1][#2]#3{%
\@safe@activestrue\org@@citex[#1][#2]{#3}\@safe@activesfalse}%
\bbl@redefine\NAT@testdef#1#2{%
\@safe@activestrue\org@NAT@testdef{#1}{#2}\@safe@activesfalse}%
\@ifundefined{org@@lbibitem}{%
\bbl@redefine\@lbibitem[#1]#2{%
\@safe@activestrue\org@@lbibitem[#1]{#2}\@safe@activesfalse}}{}%
}}
\ifnum\NAT@sort>0
\newcommand\NAT@sort@cites[1]{%
\@tempcntb\m@ne
\let\@celt\delimiter
\def\NAT@num@list{}%
\def\NAT@cite@list{}%
\def\NAT@nonsort@list{}%
\@for \@citeb:=#1\do{\NAT@make@cite@list}%
\edef\NAT@cite@list{\NAT@cite@list\NAT@nonsort@list}%
\edef\NAT@cite@list{\expandafter\NAT@xcom\NAT@cite@list @@}}
\begingroup \catcode`\_=8
\gdef\NAT@make@cite@list{%
\edef\@citeb{\expandafter\@firstofone\@citeb}%
\@ifundefined{b@\@citeb\@extra@b@citeb}{\def\NAT@num{A}}%
{\NAT@parse{\@citeb}}%
\ifcat _\ifnum\z@<0\NAT@num _\else A\fi
\@tempcnta\NAT@num \relax
\ifnum \@tempcnta>\@tempcntb
\edef\NAT@num@list{\NAT@num@list \@celt{\NAT@num}}%
\edef\NAT@cite@list{\NAT@cite@list\@citeb,}%
\@tempcntb\@tempcnta
\else
\let\NAT@@cite@list=\NAT@cite@list \def\NAT@cite@list{}%
\edef\NAT@num@list{\expandafter\NAT@num@celt \NAT@num@list \@gobble @}%
{\let\@celt=\NAT@celt\NAT@num@list}%
\fi
\else
\edef\NAT@nonsort@list{\NAT@nonsort@list\@citeb,}%
\fi}
\endgroup
\def\NAT@celt#1{\ifnum #1<\@tempcnta
\xdef\NAT@cite@list{\NAT@cite@list\expandafter\NAT@nextc\NAT@@cite@list @@}%
\xdef\NAT@@cite@list{\expandafter\NAT@restc\NAT@@cite@list}%
\else
\xdef\NAT@cite@list{\NAT@cite@list\@citeb,\NAT@@cite@list}\let\@celt\@gobble%
\fi}
\def\NAT@num@celt#1#2{\ifx \@celt #1%
\ifnum #2<\@tempcnta
\@celt{#2}%
\expandafter\expandafter\expandafter\NAT@num@celt
\else
\@celt{\number\@tempcnta}\@celt{#2}%
\fi\fi}
\def\NAT@nextc#1,#2@@{#1,}
\def\NAT@restc#1,#2{#2}
\def\NAT@xcom#1,@@{#1}
\else
\newcommand\NAT@sort@cites[1]{\edef\NAT@cite@list{#1}}\fi
\InputIfFileExists{natbib.cfg}
{\typeout{Local config file natbib.cfg used}}{}
%%
%% <<<<< End of generated file <<<<<<
%%
%% End of file `natbib.sty'.

38
tex/ngmlr.eval 100644
View File

@ -0,0 +1,38 @@
Q 60 23616 0 0.000000000
Q 45 3520 1 0.000036851
Q 41 1840 1 0.000069023
Q 37 328 2 0.000136500
Q 36 276 1 0.000169033
Q 35 480 1 0.000199601
Q 33 375 2 0.000262855
Q 31 178 2 0.000326659
Q 30 153 5 0.000487551
Q 29 200 1 0.000516696
Q 27 100 3 0.000611601
Q 26 93 3 0.000706056
Q 25 75 2 0.000768393
Q 24 82 1 0.000798314
Q 23 80 6 0.000987387
Q 22 71 6 0.001175835
Q 21 76 7 0.001394921
Q 20 63 9 0.001676897
Q 19 55 4 0.001800322
Q 18 62 8 0.002048987
Q 17 55 7 0.002265718
Q 16 60 10 0.002575539
Q 15 82 9 0.002850877
Q 14 67 7 0.003063745
Q 13 62 11 0.003401042
Q 12 64 13 0.003799084
Q 11 56 5 0.003947900
Q 10 58 17 0.004468303
Q 9 70 22 0.005139796
Q 8 23 9 0.005414604
Q 7 41 17 0.005933068
Q 6 42 18 0.006480881
Q 5 33 9 0.006751757
Q 4 29 9 0.007022948
Q 3 27 15 0.007478764
Q 2 23 10 0.007781024
Q 1 9 2 0.007840364
Q 0 13 8 0.008083105

52
tex/roc.gp 100644
View File

@ -0,0 +1,52 @@
set t po eps enh co so "Helvetica,26"
set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
set out "roc-color.eps"
set pointsize 2.0
set size 1.59,1.04
set multiplot layout 1,2
set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
set xlab "Error rate of mapped reads"
set ylab "Fraction of mapped reads" off +1.8
set ytics 0.02
set yran [0.9:1]
set size 0.8,1
set log x
set format x "10^{%L}"
set key bot right
plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
"<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
"<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
"<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
"<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
"<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
unset label
set origin 0.8,0
set size 0.79,1
set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
unset log
unset format
unset key
set log y
set ylab "Accumulative mapping error rate" off +0
set xlab "Mapping quality"
set yran [1e-5:0.1]
set ytics 1e-5,0.1
set format y "10^{%L}"
set xran [60:0] reverse
plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
"<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
"<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
"<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
"<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
"<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5