Merge branch 'dev'
This commit is contained in:
commit
dbe74ca9b8
|
|
@ -156,14 +156,16 @@ have ignored these important genes.
|
|||
We recommend to include the genomic regions of classical HLA genes in the BWA
|
||||
index. This way we will be able to get a more complete collection of reads
|
||||
mapped to HLA. We can then isolate these reads with little computational cost
|
||||
and type HLA genes with another program, such as [Dilthey et al (2014)][hla1] or
|
||||
one from [this list][hlatools].
|
||||
and type HLA genes with another program, such as [Warren et al (2012)][hla4],
|
||||
[Liu et al (2013)][hla2], [Bai et al (2014)][hla3], [Dilthey et al (2014)][hla1]
|
||||
or others from [this list][hlatools].
|
||||
|
||||
If the postprocessing script `bwa-postalt.js` is invoked with `-p prefix`, it
|
||||
will also write the top three alleles to file `prefix.hla`. However, as most HLA
|
||||
alleles from IMGT/HLA don't have intronic sequences and thus are not included in
|
||||
the reference genome, we are unable to type HLA genes to high resolution with
|
||||
the BWA-MEM mapping alone. A dedicated tool is recommended for accurate typing.
|
||||
the BWA index from option 2, we are unable to type HLA genes to high resolution
|
||||
with the BWA-MEM mapping alone. A dedicated tool is recommended for accurate
|
||||
typing.
|
||||
|
||||
### Evaluating ALT Mapping
|
||||
|
||||
|
|
@ -194,3 +196,6 @@ can even get rid of ALT contigs for good.
|
|||
[hla1]: http://biorxiv.org/content/early/2014/07/08/006973
|
||||
[hlalink]: http://www.hladiseaseassociations.com
|
||||
[hlatools]: https://www.biostars.org/p/93245/
|
||||
[hla2]: http://nar.oxfordjournals.org/content/41/14/e142.full.pdf+html
|
||||
[hla3]: http://www.biomedcentral.com/1471-2164/15/325
|
||||
[hla4]: http://genomemedicine.com/content/4/12/95
|
||||
|
|
|
|||
113
bwa-postalt.js
113
bwa-postalt.js
|
|
@ -203,42 +203,18 @@ function parse_hit(s, opt)
|
|||
return h;
|
||||
}
|
||||
|
||||
function type_hla(w)
|
||||
function print_buffer(buf2, fp_hla, hla)
|
||||
{
|
||||
var hla = ["A", "B", "C", "DQA1", "DQB1", "DRB1"];
|
||||
var hla_hash = {}, a = [], r = [];
|
||||
for (var i = 0; i < hla.length; ++i) {
|
||||
hla_hash[hla[i]] = i;
|
||||
a[i] = [];
|
||||
}
|
||||
for (var i = 0; i < w.length; ++i) {
|
||||
var t = w[i][0].split(/[:\*]/);
|
||||
var x = hla_hash[t[0]];
|
||||
if (x != null)
|
||||
a[x].push([w[i][0], t[1], t[1] + ':' + t[2], w[i][1]]);
|
||||
}
|
||||
for (var k = 1; k <= 2; ++k) {
|
||||
for (var i = 0; i < hla.length; ++i) {
|
||||
var ai = a[i], m = {};
|
||||
for (var j = 0; j < ai.length; ++j) {
|
||||
var key = ai[j][k], val = ai[j][3];
|
||||
if (m[key] == null) m[key] = [-1, -1.0];
|
||||
if (m[key][1] < val) m[key] = [j, val];
|
||||
}
|
||||
var sum = 0;
|
||||
for (var x in m) sum += m[x][1];
|
||||
var max = -1, max2 = -1, max3 = -1, max_x, max_x2, max_x3;
|
||||
for (var x in m) {
|
||||
if (max < m[x][1]) max3 = max2, max_x3 = max_x2, max2 = max, max_x2 = max_x, max = m[x][1], max_x = x;
|
||||
else if (max2 < m[x][1]) max3 = max2, max_x3 = max_x2, max2 = m[x][1], max_x2 = x;
|
||||
else if (max3 < m[x][1]) max3 = m[x][1], max_x3 = x;
|
||||
}
|
||||
r.push([hla[i], k, hla[i]+'*'+max_x, max.toFixed(3), hla[i]+'*'+max_x2, max2.toFixed(3), hla[i]+'*'+max_x3, max3.toFixed(3)]);
|
||||
if (buf2.length == 0) return;
|
||||
for (var i = 0; i < buf2.length; ++i)
|
||||
print(buf2[i].join("\t"));
|
||||
if (fp_hla != null) {
|
||||
var name = buf2[0][0] + '/' + (buf2[0][1]>>6&3) + ((buf2[0][1]&16)? '-' : '+');
|
||||
for (var x in hla) {
|
||||
if (fp_hla[x] != null);
|
||||
fp_hla[x].write('@' + name + '\n' + buf2[0][9] + '\n+\n' + buf2[0][10] + '\n');
|
||||
}
|
||||
}
|
||||
for (var i = 0; i < r.length; ++i)
|
||||
print(r[i].join("\t"));
|
||||
return r;
|
||||
}
|
||||
|
||||
function bwa_postalt(args)
|
||||
|
|
@ -285,7 +261,7 @@ function bwa_postalt(args)
|
|||
var buf = new Bytes();
|
||||
|
||||
// read ALT-to-REF alignment
|
||||
var intv_alt = {}, intv_pri = {}, idx_un = {};
|
||||
var intv_alt = {}, intv_pri = {}, idx_un = {}, hla_ctg = {};
|
||||
var file = new File(args[getopt.ind]);
|
||||
while (file.readline(buf) >= 0) {
|
||||
var line = buf.toString();
|
||||
|
|
@ -299,6 +275,10 @@ function bwa_postalt(args)
|
|||
continue;
|
||||
}
|
||||
var m, cigar = [], l_qaln = 0, l_tlen = 0, l_qclip = 0;
|
||||
if ((m = /^(HLA-[^\s\*]+)\*\d+/.exec(t[0])) != null) { // read HLA contigs
|
||||
if (hla_ctg[m[1]] == null) hla_ctg[m[1]] = 0;
|
||||
++hla_ctg[m[1]];
|
||||
}
|
||||
while ((m = re_cigar.exec(t[5])) != null) {
|
||||
var l = parseInt(m[1]);
|
||||
cigar.push([m[2] != 'H'? m[2] : 'S', l]); // convert hard clip to soft clip
|
||||
|
|
@ -321,6 +301,14 @@ function bwa_postalt(args)
|
|||
for (var ctg in intv_pri)
|
||||
idx_pri[ctg] = intv_ovlp(intv_pri[ctg]);
|
||||
|
||||
// initialize the list of HLA contigs
|
||||
var fp_hla = null;
|
||||
if (opt.pre) {
|
||||
fp_hla = {};
|
||||
for (var h in hla_ctg)
|
||||
fp_hla[h] = new File(opt.pre + '.' + h + '.fq', "w");
|
||||
}
|
||||
|
||||
// initialize the list of ALT contigs
|
||||
var weight_alt = [];
|
||||
for (var ctg in idx_alt)
|
||||
|
|
@ -329,7 +317,7 @@ function bwa_postalt(args)
|
|||
weight_alt[ctg] = [0, 0, 0, 0, 0, 0, '~', 0, 0];
|
||||
|
||||
// process SAM
|
||||
var buf2 = [];
|
||||
var buf2 = [], hla = {};
|
||||
file = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : new File();
|
||||
while (file.readline(buf) >= 0) {
|
||||
var m, line = buf.toString();
|
||||
|
|
@ -343,9 +331,8 @@ function bwa_postalt(args)
|
|||
|
||||
// print bufferred reads
|
||||
if (buf2.length && (buf2[0][0] != t[0] || (buf2[0][1]&0xc0) != (t[1]&0xc0))) {
|
||||
for (var i = 0; i < buf2.length; ++i)
|
||||
print(buf2[i].join("\t"));
|
||||
buf2 = [];
|
||||
print_buffer(buf2, fp_hla, hla);
|
||||
buf2 = [], hla = {};
|
||||
}
|
||||
|
||||
// skip unmapped lines
|
||||
|
|
@ -468,27 +455,35 @@ function bwa_postalt(args)
|
|||
else mapQ = mapQ > ori_mapQ? mapQ : ori_mapQ;
|
||||
} else mapQ = t[4];
|
||||
|
||||
var pri_ofunc = idx_pri[hits[reported_i].pctg], ovlp_alt = [];
|
||||
if (pri_ofunc != null) {
|
||||
var rpt_start = 1<<30, rpt_end = 0;
|
||||
for (var i = 0; i < hits.length; ++i) {
|
||||
var h = hits[i];
|
||||
if (h.g == reported_g) {
|
||||
rpt_start = rpt_start < h.pstart? rpt_start : h.pstart;
|
||||
rpt_end = rpt_end > h.pend ? rpt_end : h.pend;
|
||||
}
|
||||
}
|
||||
ovlp_alt = pri_ofunc(rpt_start, rpt_end);
|
||||
for (var i = 0; i < ovlp_alt.length; ++i)
|
||||
if ((m = /^(HLA-[^\s\*]+)\*\d+/.exec(ovlp_alt[i][2])) != null)
|
||||
hla[m[1]] = true;
|
||||
}
|
||||
|
||||
// ALT genotyping
|
||||
if (mapQ >= opt.min_mapq && hits[reported_i].score >= opt.min_sc) {
|
||||
// collect all overlapping ALT contigs
|
||||
var hits2 = [];
|
||||
var alts = {};
|
||||
for (var i = 0; i < hits.length; ++i) {
|
||||
var h = hits[i];
|
||||
if (h.g == reported_g)
|
||||
hits2.push([h.pctg, h.pstart, h.pend, h.ctg, h.score, h.NM]);
|
||||
if (h.g == reported_g && weight_alt[h.ctg] != null)
|
||||
alts[h.ctg] = [h.score, h.NM];
|
||||
}
|
||||
var start = hits2[0][1], end = hits2[0][2];
|
||||
for (var i = 1; i < hits2.length; ++i)
|
||||
end = end > hits2[i][2]? end : hits2[i][2];
|
||||
var alts = {};
|
||||
for (var i = 0; i < hits2.length; ++i)
|
||||
if (weight_alt[hits2[i][3]] != null)
|
||||
alts[hits2[i][3]] = [hits2[i][4], hits2[i][5]];
|
||||
if (idx_pri[hits2[0][0]] != null) { // add other unreported hits
|
||||
var ovlp = idx_pri[hits2[0][0]](start, end);
|
||||
for (var i = 0; i < ovlp.length; ++i)
|
||||
if (ovlp[i][0] <= start && end <= ovlp[i][1] && alts[ovlp[i][2]] == null)
|
||||
alts[ovlp[i][2]] = [0, 0];
|
||||
if (ovlp_alt.length > 0) { // add other unreported hits
|
||||
for (var i = 0; i < ovlp_alt.length; ++i)
|
||||
if (ovlp_alt[i][0] <= rpt_start && rpt_end <= ovlp_alt[i][1] && alts[ovlp_alt[i][2]] == null)
|
||||
alts[ovlp_alt[i][2]] = [0, 0];
|
||||
}
|
||||
|
||||
// add weight to each ALT contig
|
||||
|
|
@ -600,10 +595,12 @@ function bwa_postalt(args)
|
|||
buf2.push(s);
|
||||
}
|
||||
}
|
||||
for (var i = 0; i < buf2.length; ++i)
|
||||
print(buf2[i].join("\t"));
|
||||
print_buffer(buf2, fp_hla, hla);
|
||||
file.close();
|
||||
if (fp_evi != null) fp_evi.close();
|
||||
if (fp_hla != null)
|
||||
for (var h in fp_hla)
|
||||
fp_hla[h].close();
|
||||
|
||||
buf.destroy();
|
||||
aux.destroy();
|
||||
|
|
@ -628,12 +625,6 @@ function bwa_postalt(args)
|
|||
fpout.write(weight_arr[i].join("\t") + '\n');
|
||||
}
|
||||
fpout.close();
|
||||
|
||||
var r = type_hla(weight_hla);
|
||||
fpout = new File(opt.pre + '.hla', "w");
|
||||
for (var i = 0; i < r.length; ++i)
|
||||
fpout.write(r[i].join("\t") + '\n');
|
||||
fpout.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue