Merge branch 'dev'
This commit is contained in:
commit
039ea20639
|
|
@ -13,6 +13,7 @@
|
||||||
*** From k8.js ***
|
*** From k8.js ***
|
||||||
******************/
|
******************/
|
||||||
|
|
||||||
|
// Parse command-line options. A BSD getopt() clone in javascript.
|
||||||
var getopt = function(args, ostr) {
|
var getopt = function(args, ostr) {
|
||||||
var oli; // option letter list index
|
var oli; // option letter list index
|
||||||
if (typeof(getopt.place) == 'undefined')
|
if (typeof(getopt.place) == 'undefined')
|
||||||
|
|
@ -51,6 +52,7 @@ var getopt = function(args, ostr) {
|
||||||
return optopt;
|
return optopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// print an object in a format similar to JSON. For debugging.
|
||||||
function obj2str(o)
|
function obj2str(o)
|
||||||
{
|
{
|
||||||
if (typeof(o) != 'object') {
|
if (typeof(o) != 'object') {
|
||||||
|
|
@ -75,6 +77,7 @@ function obj2str(o)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reverse a string
|
||||||
Bytes.prototype.reverse = function()
|
Bytes.prototype.reverse = function()
|
||||||
{
|
{
|
||||||
for (var i = 0; i < this.length>>1; ++i) {
|
for (var i = 0; i < this.length>>1; ++i) {
|
||||||
|
|
@ -84,6 +87,7 @@ Bytes.prototype.reverse = function()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reverse complement a DNA string
|
||||||
Bytes.prototype.revcomp = function()
|
Bytes.prototype.revcomp = function()
|
||||||
{
|
{
|
||||||
if (Bytes.rctab == null) {
|
if (Bytes.rctab == null) {
|
||||||
|
|
@ -103,13 +107,8 @@ Bytes.prototype.revcomp = function()
|
||||||
this[this.length>>1] = Bytes.rctab[this[this.length>>1]];
|
this[this.length>>1] = Bytes.rctab[this[this.length>>1]];
|
||||||
}
|
}
|
||||||
|
|
||||||
var re_cigar = /(\d+)([MIDSHN])/g;
|
// create index for a list of intervals for fast interval queries; ported from bedidx.c in samtools
|
||||||
|
function intv_ovlp(intv, bits)
|
||||||
/******************************
|
|
||||||
*** Generate ALT alignment ***
|
|
||||||
******************************/
|
|
||||||
|
|
||||||
function intv_ovlp(intv, bits) // interval index
|
|
||||||
{
|
{
|
||||||
if (typeof bits == "undefined") bits = 13;
|
if (typeof bits == "undefined") bits = 13;
|
||||||
intv.sort(function(a,b) {return a[0]-b[0];});
|
intv.sort(function(a,b) {return a[0]-b[0];});
|
||||||
|
|
@ -142,7 +141,14 @@ function intv_ovlp(intv, bits) // interval index
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function cigar2pos(cigar, pos) // given a pos on ALT and the ALT-to-REF CIGAR, find the pos on REF
|
var re_cigar = /(\d+)([MIDSHN])/g;
|
||||||
|
|
||||||
|
/******************************
|
||||||
|
*** Generate ALT alignment ***
|
||||||
|
******************************/
|
||||||
|
|
||||||
|
// given a pos on ALT and the ALT-to-REF CIGAR, find the pos on REF
|
||||||
|
function cigar2pos(cigar, pos)
|
||||||
{
|
{
|
||||||
var x = 0, y = 0;
|
var x = 0, y = 0;
|
||||||
for (var i = 0; i < cigar.length; ++i) {
|
for (var i = 0; i < cigar.length; ++i) {
|
||||||
|
|
@ -166,7 +172,9 @@ function cigar2pos(cigar, pos) // given a pos on ALT and the ALT-to-REF CIGAR, f
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parse_hit(s, opt) // parse a hit. s looks something like ["chr1", "+12345", "100M", 5]
|
// Parse a hit. $s is an array that looks something like ["chr1", "+12345", "100M", 5]
|
||||||
|
// Return an object keeping various information about the alignment.
|
||||||
|
function parse_hit(s, opt)
|
||||||
{
|
{
|
||||||
var h = {};
|
var h = {};
|
||||||
h.ctg = s[0];
|
h.ctg = s[0];
|
||||||
|
|
@ -221,7 +229,7 @@ function read_ALT_sam(fn)
|
||||||
}
|
}
|
||||||
buf.destroy();
|
buf.destroy();
|
||||||
file.close();
|
file.close();
|
||||||
// create the interval index
|
// create index for intervals on ALT contigs
|
||||||
var idx = {};
|
var idx = {};
|
||||||
for (var ctg in intv)
|
for (var ctg in intv)
|
||||||
idx[ctg] = intv_ovlp(intv[ctg]);
|
idx[ctg] = intv_ovlp(intv[ctg]);
|
||||||
|
|
@ -270,20 +278,22 @@ function bwa_postalt(args)
|
||||||
|
|
||||||
var t = line.split("\t");
|
var t = line.split("\t");
|
||||||
t[1] = parseInt(t[1]); t[3] = parseInt(t[3]); t[4] = parseInt(t[4]);
|
t[1] = parseInt(t[1]); t[3] = parseInt(t[3]); t[4] = parseInt(t[4]);
|
||||||
if (buf2.length && buf2[0][0] != t[0]) {
|
|
||||||
|
// print bufferred reads
|
||||||
|
if (buf2.length && (buf2[0][0] != t[0] || (buf2[0][1]&0xc0) != (t[1]&0xc0))) {
|
||||||
for (var i = 0; i < buf2.length; ++i)
|
for (var i = 0; i < buf2.length; ++i)
|
||||||
print(buf2[i].join("\t"));
|
print(buf2[i].join("\t"));
|
||||||
buf2 = [];
|
buf2 = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((m = /\tXA:Z:(\S+)/.exec(line)) == null) { // TODO: this does not work with PE file
|
if ((m = /\tXA:Z:(\S+)/.exec(line)) == null) {
|
||||||
buf2.push(t);
|
buf2.push(t);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse hits
|
|
||||||
var hits = [];
|
|
||||||
var XA_strs = m[1].split(";");
|
var XA_strs = m[1].split(";");
|
||||||
|
|
||||||
|
// parse the reported hit
|
||||||
|
var hits = [];
|
||||||
var NM = (m = /\tNM:i:(\d+)/.exec(line)) == null? '0' : m[1];
|
var NM = (m = /\tNM:i:(\d+)/.exec(line)) == null? '0' : m[1];
|
||||||
var flag = t[1];
|
var flag = t[1];
|
||||||
var h = parse_hit([t[2], ((flag&16)?'-':'+') + t[3], t[5], NM], opt);
|
var h = parse_hit([t[2], ((flag&16)?'-':'+') + t[3], t[5], NM], opt);
|
||||||
|
|
@ -292,11 +302,13 @@ function bwa_postalt(args)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
hits.push(h);
|
hits.push(h);
|
||||||
for (var i = 0; i < XA_strs.length; ++i) // hits in the XA tag
|
|
||||||
|
// parse hits in the XA tag
|
||||||
|
for (var i = 0; i < XA_strs.length; ++i)
|
||||||
if (XA_strs[i] != '') // as the last symbol in an XA tag is ";", the last split is an empty string
|
if (XA_strs[i] != '') // as the last symbol in an XA tag is ";", the last split is an empty string
|
||||||
hits.push(parse_hit(XA_strs[i].split(","), opt));
|
hits.push(parse_hit(XA_strs[i].split(","), opt));
|
||||||
|
|
||||||
// lift mapping positions to coordinates on the primary assembly
|
// lift mapping positions to the primary assembly
|
||||||
var n_lifted = 0, n_rpt_lifted = 0;
|
var n_lifted = 0, n_rpt_lifted = 0;
|
||||||
for (var i = 0; i < hits.length; ++i) {
|
for (var i = 0; i < hits.length; ++i) {
|
||||||
var h = hits[i];
|
var h = hits[i];
|
||||||
|
|
@ -328,7 +340,7 @@ function bwa_postalt(args)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// group hits
|
// group hits based on the lifted positions on the primary assembly
|
||||||
for (var i = 0; i < hits.length; ++i) { // set keys for sorting
|
for (var i = 0; i < hits.length; ++i) { // set keys for sorting
|
||||||
if (hits[i].lifted && hits[i].lifted.length) // TODO: only the first element in lifted[] is used
|
if (hits[i].lifted && hits[i].lifted.length) // TODO: only the first element in lifted[] is used
|
||||||
hits[i].pctg = hits[i].lifted[0][0], hits[i].pstart = hits[i].lifted[0][2], hits[i].pend = hits[i].lifted[0][3];
|
hits[i].pctg = hits[i].lifted[0][0], hits[i].pstart = hits[i].lifted[0][2], hits[i].pend = hits[i].lifted[0][3];
|
||||||
|
|
@ -352,8 +364,9 @@ function bwa_postalt(args)
|
||||||
if (hits[i].g == reported_g)
|
if (hits[i].g == reported_g)
|
||||||
++n_group0;
|
++n_group0;
|
||||||
|
|
||||||
|
// re-estimate mapping quality if necessary
|
||||||
var mapQ, ori_mapQ = t[4];
|
var mapQ, ori_mapQ = t[4];
|
||||||
if (n_group0 > 1) { // then re-estimate mapQ
|
if (n_group0 > 1) {
|
||||||
var group_max = [];
|
var group_max = [];
|
||||||
for (var i = 0; i < hits.length; ++i) {
|
for (var i = 0; i < hits.length; ++i) {
|
||||||
var g = hits[i].g;
|
var g = hits[i].g;
|
||||||
|
|
@ -414,11 +427,13 @@ function bwa_postalt(args)
|
||||||
aux.set(t[10],0); aux.reverse(); rq = aux.toString();
|
aux.set(t[10],0); aux.reverse(); rq = aux.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// print
|
// stage the reported hit
|
||||||
t[4] = mapQ;
|
t[4] = mapQ;
|
||||||
if (n_group0 > 1) t.push("om:i:"+ori_mapQ);
|
if (n_group0 > 1) t.push("om:i:"+ori_mapQ);
|
||||||
if (hits[reported_i].lifted_str) t.push("lt:Z:" + hits[reported_i].lifted_str);
|
if (hits[reported_i].lifted_str) t.push("lt:Z:" + hits[reported_i].lifted_str);
|
||||||
buf2.push(t);
|
buf2.push(t);
|
||||||
|
|
||||||
|
// stage the hits generated from the XA tag
|
||||||
var cnt = 0;
|
var cnt = 0;
|
||||||
for (var i = 0; i < hits.length; ++i) {
|
for (var i = 0; i < hits.length; ++i) {
|
||||||
if (opt.verbose >= 5) print(obj2str(hits[i]));
|
if (opt.verbose >= 5) print(obj2str(hits[i]));
|
||||||
|
|
|
||||||
4
bwamem.c
4
bwamem.c
|
|
@ -858,7 +858,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
kputsn("*\t*", 3, str);
|
kputsn("*\t*", 3, str);
|
||||||
} else if (!p->is_rev) { // the forward strand
|
} else if (!p->is_rev) { // the forward strand
|
||||||
int i, qb = 0, qe = s->l_seq;
|
int i, qb = 0, qe = s->l_seq;
|
||||||
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) { // have cigar && not the primary alignment && not softclip all
|
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP) && !p->is_alt) { // have cigar && not the primary alignment && not softclip all
|
||||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
|
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
|
||||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
|
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
|
||||||
}
|
}
|
||||||
|
|
@ -872,7 +872,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
|
||||||
} else kputc('*', str);
|
} else kputc('*', str);
|
||||||
} else { // the reverse strand
|
} else { // the reverse strand
|
||||||
int i, qb = 0, qe = s->l_seq;
|
int i, qb = 0, qe = s->l_seq;
|
||||||
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) {
|
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP) && !p->is_alt) {
|
||||||
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
|
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
|
||||||
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
|
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
11
fastmap.c
11
fastmap.c
|
|
@ -10,8 +10,8 @@
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "bntseq.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
#include "utils.h"
|
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
@ -38,7 +38,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
|
||||||
int main_mem(int argc, char *argv[])
|
int main_mem(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
mem_opt_t *opt, opt0;
|
mem_opt_t *opt, opt0;
|
||||||
int fd, fd2, i, c, n, copy_comment = 0;
|
int fd, fd2, i, c, n, copy_comment = 0, ignore_alt = 0;
|
||||||
int fixed_chunk_size = -1, actual_chunk_size;
|
int fixed_chunk_size = -1, actual_chunk_size;
|
||||||
gzFile fp, fp2 = 0;
|
gzFile fp, fp2 = 0;
|
||||||
kseq_t *ks, *ks2 = 0;
|
kseq_t *ks, *ks2 = 0;
|
||||||
|
|
@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[])
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||||
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:g:")) >= 0) {
|
while ((c = getopt(argc, argv, "epaFMCSPHVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:g:")) >= 0) {
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||||
else if (c == 'x') mode = optarg;
|
else if (c == 'x') mode = optarg;
|
||||||
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
|
||||||
|
|
@ -76,6 +76,7 @@ int main_mem(int argc, char *argv[])
|
||||||
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
|
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
|
||||||
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
|
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
|
||||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||||
|
else if (c == 'j') ignore_alt = 1;
|
||||||
else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.;
|
else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.;
|
||||||
else if (c == 'D') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;
|
else if (c == 'D') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;
|
||||||
else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;
|
else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;
|
||||||
|
|
@ -168,6 +169,7 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, "\nInput/output options:\n\n");
|
fprintf(stderr, "\nInput/output options:\n\n");
|
||||||
fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n");
|
fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n");
|
||||||
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
|
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
|
||||||
|
fprintf(stderr, " -j ignore ALT contigs\n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
|
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
|
||||||
fprintf(stderr, " -g FLOAT set mapQ to zero if the ratio of the primary-to-alt scores below FLOAT [%.3f]\n", opt->min_pa_ratio);
|
fprintf(stderr, " -g FLOAT set mapQ to zero if the ratio of the primary-to-alt scores below FLOAT [%.3f]\n", opt->min_pa_ratio);
|
||||||
|
|
@ -229,6 +231,9 @@ int main_mem(int argc, char *argv[])
|
||||||
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
||||||
|
|
||||||
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
||||||
|
if (ignore_alt)
|
||||||
|
for (i = 0; i < idx->bns->n_seqs; ++i)
|
||||||
|
idx->bns->anns[i].is_alt = 0;
|
||||||
|
|
||||||
ko = kopen(argv[optind + 1], &fd);
|
ko = kopen(argv[optind + 1], &fd);
|
||||||
if (ko == 0) {
|
if (ko == 0) {
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -4,7 +4,7 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef PACKAGE_VERSION
|
#ifndef PACKAGE_VERSION
|
||||||
#define PACKAGE_VERSION "0.7.10-r868-dirty"
|
#define PACKAGE_VERSION "0.7.10-r876-dirty"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[]);
|
int bwa_fa2pac(int argc, char *argv[]);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue