code backup
This commit is contained in:
parent
a03d01f944
commit
d802cfb7f5
|
|
@ -52,7 +52,7 @@ var getopt = function(args, ostr) {
|
||||||
return optopt;
|
return optopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// print an object in a format similar to JSON. For debugging.
|
// print an object in a format similar to JSON. For debugging only.
|
||||||
function obj2str(o)
|
function obj2str(o)
|
||||||
{
|
{
|
||||||
if (typeof(o) != 'object') {
|
if (typeof(o) != 'object') {
|
||||||
|
|
@ -206,34 +206,39 @@ function parse_hit(s, opt)
|
||||||
// read the ALT-to-REF alignment and generate the index
|
// read the ALT-to-REF alignment and generate the index
|
||||||
function read_ALT_sam(fn)
|
function read_ALT_sam(fn)
|
||||||
{
|
{
|
||||||
var intv = {};
|
var intv_alt = {}, intv_pri = {};
|
||||||
var file = new File(fn);
|
var file = new File(fn);
|
||||||
var buf = new Bytes();
|
var buf = new Bytes();
|
||||||
while (file.readline(buf) >= 0) {
|
while (file.readline(buf) >= 0) {
|
||||||
var line = buf.toString();
|
var line = buf.toString();
|
||||||
var t = line.split("\t");
|
var t = line.split("\t");
|
||||||
if (line.charAt(0) == '@') continue;
|
if (line.charAt(0) == '@') continue;
|
||||||
|
var pos = parseInt(t[3]) - 1;
|
||||||
var flag = parseInt(t[1]);
|
var flag = parseInt(t[1]);
|
||||||
var m, cigar = [], l_qaln = 0, l_qclip = 0;
|
var m, cigar = [], l_qaln = 0, l_tlen = 0, l_qclip = 0;
|
||||||
while ((m = re_cigar.exec(t[5])) != null) {
|
while ((m = re_cigar.exec(t[5])) != null) {
|
||||||
var l = parseInt(m[1]);
|
var l = parseInt(m[1]);
|
||||||
cigar.push([m[2] != 'H'? m[2] : 'S', l]); // convert hard clip to soft clip
|
cigar.push([m[2] != 'H'? m[2] : 'S', l]); // convert hard clip to soft clip
|
||||||
if (m[2] == 'M' || m[2] == 'I') l_qaln += l;
|
if (m[2] == 'M' || m[2] == 'I') l_qaln += l, l_tlen += l;
|
||||||
else if (m[2] == 'S' || m[2] == 'H') l_qclip += l;
|
else if (m[2] == 'S' || m[2] == 'H') l_qclip += l;
|
||||||
|
else if (m[2] == 'D' || m[2] == 'N') l_tlen += l;
|
||||||
}
|
}
|
||||||
var j = flag&16? cigar.length-1 : 0;
|
var j = flag&16? cigar.length-1 : 0;
|
||||||
var start = cigar[j][0] == 'S'? cigar[j][1] : 0;
|
var start = cigar[j][0] == 'S'? cigar[j][1] : 0;
|
||||||
if (intv[t[0]] == null) intv[t[0]] = [];
|
if (intv_alt[t[0]] == null) intv_alt[t[0]] = [];
|
||||||
intv[t[0]].push([start, start + l_qaln, l_qaln + l_qclip, t[2], flag&16? true : false, parseInt(t[3]) - 1, cigar]);
|
intv_alt[t[0]].push([start, start + l_qaln, l_qaln + l_qclip, t[2], flag&16? true : false, parseInt(t[3]) - 1, cigar]);
|
||||||
//print(start, start + l_qaln, t[2], flag&16? true : false, parseInt(t[3]), cigar);
|
if (intv_pri[t[2]] == null) intv_pri[t[2]] = [];
|
||||||
|
intv_pri[t[2]].push([pos, pos + l_tlen, t[0]]);
|
||||||
}
|
}
|
||||||
buf.destroy();
|
buf.destroy();
|
||||||
file.close();
|
file.close();
|
||||||
// create index for intervals on ALT contigs
|
// create index for intervals on ALT contigs
|
||||||
var idx = {};
|
var idx_alt = {}, idx_pri = {};
|
||||||
for (var ctg in intv)
|
for (var ctg in intv_alt)
|
||||||
idx[ctg] = intv_ovlp(intv[ctg]);
|
idx_alt[ctg] = intv_ovlp(intv_alt[ctg]);
|
||||||
return idx;
|
for (var ctg in intv_pri)
|
||||||
|
idx_pri[ctg] = intv_ovlp(intv_pri[ctg]);
|
||||||
|
return [idx_alt, idx_pri];
|
||||||
}
|
}
|
||||||
|
|
||||||
function bwa_postalt(args)
|
function bwa_postalt(args)
|
||||||
|
|
@ -264,8 +269,9 @@ function bwa_postalt(args)
|
||||||
|
|
||||||
var file, buf = new Bytes();
|
var file, buf = new Bytes();
|
||||||
var aux = new Bytes(); // used for reverse and reverse complement
|
var aux = new Bytes(); // used for reverse and reverse complement
|
||||||
var idx = read_ALT_sam(args[getopt.ind]);
|
var idx_pair = read_ALT_sam(args[getopt.ind]);
|
||||||
var buf2 = [];
|
var idx_alt = idx_pair[0], idx_pri = idx_pair[1];
|
||||||
|
var buf2 = [], buf3 = [];
|
||||||
|
|
||||||
// process SAM
|
// process SAM
|
||||||
file = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : new File();
|
file = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : new File();
|
||||||
|
|
@ -286,6 +292,21 @@ function bwa_postalt(args)
|
||||||
buf2 = [];
|
buf2 = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test primary and if so whether it overlaps with ALT regions
|
||||||
|
if (idx_pri[t[2]] != null) {
|
||||||
|
var start = t[3], end = start;
|
||||||
|
while ((m = re_cigar.exec(t[5])) != null)
|
||||||
|
if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N')
|
||||||
|
end += parseInt(m[1]);
|
||||||
|
var ovlp = idx_pri[t[2]](start, end);
|
||||||
|
if (ovlp.length > 0) {
|
||||||
|
var score = (m = /\tAS:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : 1;
|
||||||
|
for (var i = 0; i < ovlp.length; ++i)
|
||||||
|
buf3.push([t[2], start, end, ovlp[i][2]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the XA tag
|
||||||
if ((m = /\tXA:Z:(\S+)/.exec(line)) == null) {
|
if ((m = /\tXA:Z:(\S+)/.exec(line)) == null) {
|
||||||
buf2.push(t);
|
buf2.push(t);
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -309,12 +330,12 @@ function bwa_postalt(args)
|
||||||
hits.push(parse_hit(XA_strs[i].split(","), opt));
|
hits.push(parse_hit(XA_strs[i].split(","), opt));
|
||||||
|
|
||||||
// lift mapping positions to the primary assembly
|
// lift mapping positions to the primary assembly
|
||||||
var n_lifted = 0, n_rpt_lifted = 0;
|
var n_lifted = 0, n_rpt_lifted = 0, rpt_lifted = null;
|
||||||
for (var i = 0; i < hits.length; ++i) {
|
for (var i = 0; i < hits.length; ++i) {
|
||||||
var h = hits[i];
|
var h = hits[i];
|
||||||
|
|
||||||
if (idx[h.ctg] == null) continue;
|
if (idx_alt[h.ctg] == null) continue;
|
||||||
var a = idx[h.ctg](h.start, h.end);
|
var a = idx_alt[h.ctg](h.start, h.end);
|
||||||
if (a == null || a.length == 0) continue;
|
if (a == null || a.length == 0) continue;
|
||||||
|
|
||||||
// find the approximate position on the primary assembly
|
// find the approximate position on the primary assembly
|
||||||
|
|
@ -333,6 +354,7 @@ function bwa_postalt(args)
|
||||||
lifted.push([a[j][3], (h.rev!=a[j][4]), s, e]);
|
lifted.push([a[j][3], (h.rev!=a[j][4]), s, e]);
|
||||||
if (i == 0) ++n_rpt_lifted;
|
if (i == 0) ++n_rpt_lifted;
|
||||||
}
|
}
|
||||||
|
if (i == 0 && n_rpt_lifted == 1) rpt_lifted = lifted[0].slice(0);
|
||||||
if (lifted.length) ++n_lifted, hits[i].lifted = lifted;
|
if (lifted.length) ++n_lifted, hits[i].lifted = lifted;
|
||||||
}
|
}
|
||||||
if (n_lifted == 0) {
|
if (n_lifted == 0) {
|
||||||
|
|
@ -384,7 +406,7 @@ function bwa_postalt(args)
|
||||||
|
|
||||||
// check if the reported hit overlaps a hit to the primary assembly; if so, don't reduce mapping quality
|
// check if the reported hit overlaps a hit to the primary assembly; if so, don't reduce mapping quality
|
||||||
if (opt.recover_mapq && n_rpt_lifted == 1 && mapQ > 0) {
|
if (opt.recover_mapq && n_rpt_lifted == 1 && mapQ > 0) {
|
||||||
var l = lifted[0];
|
var l = rpt_lifted;
|
||||||
for (var i = 0; i < buf2.length; ++i) {
|
for (var i = 0; i < buf2.length; ++i) {
|
||||||
var s = buf2[i];
|
var s = buf2[i];
|
||||||
if (l[0] != s[2]) continue; // different chr
|
if (l[0] != s[2]) continue; // different chr
|
||||||
|
|
@ -423,6 +445,7 @@ function bwa_postalt(args)
|
||||||
need_rev = true;
|
need_rev = true;
|
||||||
}
|
}
|
||||||
if (need_rev) { // reverse and reverse complement
|
if (need_rev) { // reverse and reverse complement
|
||||||
|
aux.length = 0;
|
||||||
aux.set(t[9], 0); aux.revcomp(); rs = aux.toString();
|
aux.set(t[9], 0); aux.revcomp(); rs = aux.toString();
|
||||||
aux.set(t[10],0); aux.reverse(); rq = aux.toString();
|
aux.set(t[10],0); aux.reverse(); rq = aux.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -438,14 +461,16 @@ function bwa_postalt(args)
|
||||||
for (var i = 0; i < hits.length; ++i) {
|
for (var i = 0; i < hits.length; ++i) {
|
||||||
if (opt.verbose >= 5) print(obj2str(hits[i]));
|
if (opt.verbose >= 5) print(obj2str(hits[i]));
|
||||||
if (hits[i].g != reported_g || i == reported_i) continue;
|
if (hits[i].g != reported_g || i == reported_i) continue;
|
||||||
if (!opt.show_pri && idx[hits[i].ctg] == null) continue;
|
if (!opt.show_pri && idx_alt[hits[i].ctg] == null) continue;
|
||||||
var s = [t[0], flag&0xf10, hits[i].ctg, hits[i].start+1, mapQ, hits[i].cigar, '*', 0, 0];
|
var s = [t[0], 0, hits[i].ctg, hits[i].start+1, mapQ, hits[i].cigar, '*', 0, 0];
|
||||||
// update name
|
// print sequence/quality and set the rev flag
|
||||||
if (flag&0x40) s[0] += "/1";
|
if (hits[i].rev == hits[reported_i].rev) {
|
||||||
if (flag&0x80) s[0] += "/2";
|
s.push(t[9], t[10]);
|
||||||
s[0] += "_" + (++cnt);
|
s[1] = flag & 0x10;
|
||||||
if (hits[i].rev == hits[reported_i].rev) s.push(t[9], t[10]);
|
} else {
|
||||||
else s.push(rs, rq);
|
s.push(rs, rq);
|
||||||
|
s[1] = (flag & 0x10) ^ 0x10;
|
||||||
|
}
|
||||||
s.push("NM:i:" + hits[i].NM);
|
s.push("NM:i:" + hits[i].NM);
|
||||||
if (hits[i].lifted_str) s.push("lt:Z:" + hits[i].lifted_str);
|
if (hits[i].lifted_str) s.push("lt:Z:" + hits[i].lifted_str);
|
||||||
buf2.push(s);
|
buf2.push(s);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue