better CLI; more comments
This commit is contained in:
parent
4bc01e90c8
commit
40c4a6ffe1
|
|
@ -1,6 +1,72 @@
|
||||||
var fuzzy = 3, min_qal = 100, drop_thres = 7;
|
/*****************************************************************
|
||||||
|
* The K8 Javascript interpreter is required to run this script. *
|
||||||
|
* *
|
||||||
|
* Source code: https://github.com/attractivechaos/k8 *
|
||||||
|
* Binary: http://sourceforge.net/projects/lh3/files/k8/ *
|
||||||
|
*****************************************************************/
|
||||||
|
|
||||||
var file = arguments.length == 0? new File () : new File(arguments[0]);
|
var getopt = function(args, ostr) {
|
||||||
|
var oli; // option letter list index
|
||||||
|
if (typeof(getopt.place) == 'undefined')
|
||||||
|
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
|
||||||
|
if (getopt.place == -1) { // update scanning pointer
|
||||||
|
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
|
||||||
|
getopt.place = -1;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
|
||||||
|
++getopt.ind;
|
||||||
|
getopt.place = -1;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
|
||||||
|
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
|
||||||
|
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
|
||||||
|
if (getopt.place < 0) ++getopt.ind;
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
|
||||||
|
getopt.arg = null;
|
||||||
|
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
|
||||||
|
} else { // need an argument
|
||||||
|
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
|
||||||
|
getopt.arg = args[getopt.ind].substr(getopt.place);
|
||||||
|
else if (args.length <= ++getopt.ind) { // no arg
|
||||||
|
getopt.place = -1;
|
||||||
|
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
|
||||||
|
return '?';
|
||||||
|
} else getopt.arg = args[getopt.ind]; // white space
|
||||||
|
getopt.place = -1;
|
||||||
|
++getopt.ind;
|
||||||
|
}
|
||||||
|
return optopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
*** Main function ***
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
var c, fuzzy = 1, min_qal = 100, drop_thres = 7;
|
||||||
|
|
||||||
|
// parse command line options
|
||||||
|
while ((c = getopt(arguments, "f:m:d:")) != null) {
|
||||||
|
if (c == 'f') fuzzy = parseInt(getopt.arg);
|
||||||
|
else if (c == 'm') min_qal = parseInt(getopt.arg);
|
||||||
|
else if (c == 'd') drop_thres = parseInt(getopt.arg);
|
||||||
|
}
|
||||||
|
if (arguments.length == getopt.ind) {
|
||||||
|
print("");
|
||||||
|
print("Usage: k8 bwa-typeHLA.js [options] <HLA-to-contig.sam>\n");
|
||||||
|
print("Options: -d INT drop a contig if the edit distance to the closest gene is >INT ["+drop_thres+"]");
|
||||||
|
print(" -m INT ignore a hit if the gene sequence in the alignment is <INT ["+min_qal+"]");
|
||||||
|
print(" -f INT output genotypes whose edit distance is within INT from the best ["+fuzzy+"]");
|
||||||
|
print("");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// read alignments
|
||||||
|
var file = new File(arguments[getopt.ind]);
|
||||||
var buf = new Bytes();
|
var buf = new Bytes();
|
||||||
var re_cigar = /(\d+)([MIDSH])/g;
|
var re_cigar = /(\d+)([MIDSH])/g;
|
||||||
|
|
||||||
|
|
@ -8,11 +74,13 @@ var len = {}, list = [], gcnt = [];
|
||||||
while (file.readline(buf) >= 0) {
|
while (file.readline(buf) >= 0) {
|
||||||
var m, mm, line = buf.toString();
|
var m, mm, line = buf.toString();
|
||||||
var t = line.split("\t");
|
var t = line.split("\t");
|
||||||
|
// SAM header
|
||||||
if (t[0].charAt(0) == '@') {
|
if (t[0].charAt(0) == '@') {
|
||||||
if (t[0] == '@SQ' && (m = /LN:(\d+)/.exec(line)) != null && (mm = /SN:(\S+)/.exec(line)) != null)
|
if (t[0] == '@SQ' && (m = /LN:(\d+)/.exec(line)) != null && (mm = /SN:(\S+)/.exec(line)) != null)
|
||||||
len[mm[1]] = parseInt(m[1]);
|
len[mm[1]] = parseInt(m[1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// parse gene name and exon number
|
||||||
var gene = null, exon = null;
|
var gene = null, exon = null;
|
||||||
if ((m = /^(HLA-[^\s_]+)_(\d+)/.exec(t[0])) != null) {
|
if ((m = /^(HLA-[^\s_]+)_(\d+)/.exec(t[0])) != null) {
|
||||||
gene = m[1], exon = parseInt(m[2]) - 1;
|
gene = m[1], exon = parseInt(m[2]) - 1;
|
||||||
|
|
@ -20,6 +88,7 @@ while (file.readline(buf) >= 0) {
|
||||||
gcnt[exon][gene] = true;
|
gcnt[exon][gene] = true;
|
||||||
}
|
}
|
||||||
if (gene == null || exon == null || t[2] == '*') continue;
|
if (gene == null || exon == null || t[2] == '*') continue;
|
||||||
|
// parse clipping and aligned length
|
||||||
var x = 0, ts = parseInt(t[3]) - 1, te = ts, clip = [0, 0];
|
var x = 0, ts = parseInt(t[3]) - 1, te = ts, clip = [0, 0];
|
||||||
while ((m = re_cigar.exec(t[5])) != null) {
|
while ((m = re_cigar.exec(t[5])) != null) {
|
||||||
var l = parseInt(m[1]);
|
var l = parseInt(m[1]);
|
||||||
|
|
@ -33,16 +102,17 @@ while (file.readline(buf) >= 0) {
|
||||||
var left = ts < clip[0]? ts : clip[0];
|
var left = ts < clip[0]? ts : clip[0];
|
||||||
var right = tl - te < clip[1]? tl - te : clip[1];
|
var right = tl - te < clip[1]? tl - te : clip[1];
|
||||||
var nm = (m = /\tNM:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : 0;
|
var nm = (m = /\tNM:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : 0;
|
||||||
list.push([t[2], gene, exon, ts, te, nm, left + right]);
|
list.push([t[2], gene, exon, ts, te, nm, left + right]); // left+right should be 0 given a prefix-suffix alignment
|
||||||
}
|
}
|
||||||
|
|
||||||
buf.destroy();
|
buf.destroy();
|
||||||
file.close();
|
file.close();
|
||||||
|
|
||||||
// identify the primary exon(s)
|
// identify the primary exons, the exons associated with most genes
|
||||||
var pri_exon = [], n_pri_exons;
|
var pri_exon = [], n_pri_exons;
|
||||||
{
|
{
|
||||||
var cnt = [], max = 0;
|
var cnt = [], max = 0;
|
||||||
|
// count the number of genes per exon and track the max
|
||||||
for (var e = 0; e < gcnt.length; ++e) {
|
for (var e = 0; e < gcnt.length; ++e) {
|
||||||
if (gcnt[e] != null) {
|
if (gcnt[e] != null) {
|
||||||
var c = 0, h = gcnt[e];
|
var c = 0, h = gcnt[e];
|
||||||
|
|
@ -51,6 +121,7 @@ var pri_exon = [], n_pri_exons;
|
||||||
max = max > c? max : c;
|
max = max > c? max : c;
|
||||||
} else cnt[e] = 0;
|
} else cnt[e] = 0;
|
||||||
}
|
}
|
||||||
|
// find primary exons
|
||||||
var pri_list = [];
|
var pri_list = [];
|
||||||
for (var e = 0; e < cnt.length; ++e) {
|
for (var e = 0; e < cnt.length; ++e) {
|
||||||
if (cnt[e] == max) pri_list.push(e + 1);
|
if (cnt[e] == max) pri_list.push(e + 1);
|
||||||
|
|
@ -76,7 +147,7 @@ for (var i = 0; i < list.length; ++i) {
|
||||||
elist[g][list[i][2]] = true;
|
elist[g][list[i][2]] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// change to the per-exon representation
|
// reorganize hits to exons
|
||||||
var exons = [];
|
var exons = [];
|
||||||
for (var i = 0; i < list.length; ++i) {
|
for (var i = 0; i < list.length; ++i) {
|
||||||
var li = list[i];
|
var li = list[i];
|
||||||
|
|
@ -94,10 +165,9 @@ for (var i = 0; i < glist.length; ++i) {
|
||||||
|
|
||||||
// type each exon
|
// type each exon
|
||||||
for (var e = 0; e < exons.length; ++e) {
|
for (var e = 0; e < exons.length; ++e) {
|
||||||
//for (var e = 1; e < 3; ++e) {
|
|
||||||
if (exons[e] == null) continue;
|
if (exons[e] == null) continue;
|
||||||
var ee = exons[e];
|
var ee = exons[e];
|
||||||
// find good contigs and alleles that have good matches
|
// find contigs and genes associated with the current exon
|
||||||
var ch = {}, gh = {};
|
var ch = {}, gh = {};
|
||||||
for (var i = 0; i < ee.length; ++i) {
|
for (var i = 0; i < ee.length; ++i) {
|
||||||
if (elist[ee[i][1]][e] != null)
|
if (elist[ee[i][1]][e] != null)
|
||||||
|
|
@ -108,7 +178,7 @@ for (var e = 0; e < exons.length; ++e) {
|
||||||
for (var g in gh) ga.push(parseInt(g));
|
for (var g in gh) ga.push(parseInt(g));
|
||||||
var named_ca = [];
|
var named_ca = [];
|
||||||
for (var i = 0; i < ca.length; ++i) named_ca.push(clist[ca[i]]);
|
for (var i = 0; i < ca.length; ++i) named_ca.push(clist[ca[i]]);
|
||||||
warn("Processing exon "+(e+1)+" (" +ga.length+ " genes; " +ca.length+ " contigs: [" +named_ca.join(",")+ "])...");
|
warn("Processing exon "+(e+1)+" (" +ga.length+ " genes; " +ca.length+ " contigs: [" +named_ca.join(", ")+ "])...");
|
||||||
// convert representation again
|
// convert representation again
|
||||||
var sc = [];
|
var sc = [];
|
||||||
for (var i = 0; i < ee.length; ++i) {
|
for (var i = 0; i < ee.length; ++i) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue