This commit is contained in:
Heng Li 2014-11-04 10:07:14 -05:00
parent 4cbb0cb829
commit ace18171b0
2 changed files with 36 additions and 12 deletions

View File

@ -47,7 +47,7 @@ var getopt = function(args, ostr) {
*** Main function *** *** Main function ***
*********************/ *********************/
var c, fuzzy = 1, min_qal = 100, drop_thres = 7; var c, fuzzy = 1, min_qal = 100, drop_thres = 5;
// parse command line options // parse command line options
while ((c = getopt(arguments, "f:m:d:")) != null) { while ((c = getopt(arguments, "f:m:d:")) != null) {
@ -212,8 +212,11 @@ for (var e = 0; e < exons.length; ++e) {
var gj = ga[j], g2 = sc[gj], m = 0; var gj = ga[j], g2 = sc[gj], m = 0;
for (var k = 0; k < ca.length; ++k) { for (var k = 0; k < ca.length; ++k) {
var c = ca[k]; var c = ca[k];
if (!dropped[c]) if (!dropped[c]) {
m += g1[c] < g2[c]? g1[c] : g2[c]; m += g1[c] < g2[c]? g1[c] : g2[c];
if ((gi == 518 && gj == 653) || (gi == 653 && gj == 518))
print(e+1, clist[c], g1[c], g2[c]);
}
} }
var x = m<<20 | 1<<6 | pri_exon[e]; var x = m<<20 | 1<<6 | pri_exon[e];
if (gi < gj) pair[gj][gi] += x; if (gi < gj) pair[gj][gi] += x;
@ -223,6 +226,16 @@ for (var e = 0; e < exons.length; ++e) {
} }
} }
// extract the 3rd and 4th digits
var gsub = [], gsuf = [];
for (var i = 0; i < glist.length; ++i) {
var m = /^HLA-[^*\s]+\*\d+:(\d+).*([A-Z]?)$/.exec(glist[i]);
gsub[i] = parseInt(m[1]);
gsuf[i] = /[A-Z]$/.test(glist[i])? 1 : 0;
}
warn(ghash["HLA-DQB1*06:04:02"], ghash["HLA-DQB1*06:44"], pair[518][518].toString(16), pair[653][518].toString(16));
// genotyping // genotyping
var min_nm = 0x7fffffff; var min_nm = 0x7fffffff;
for (var i = 0; i < glist.length; ++i) for (var i = 0; i < glist.length; ++i)
@ -234,8 +247,8 @@ var out = [];
for (var i = 0; i < glist.length; ++i) for (var i = 0; i < glist.length; ++i)
for (var j = 0; j <= i; ++j) for (var j = 0; j <= i; ++j)
if ((pair[i][j]&63) == n_pri_exons && pair[i][j]>>20 <= min_nm + fuzzy) if ((pair[i][j]&63) == n_pri_exons && pair[i][j]>>20 <= min_nm + fuzzy)
out.push([pair[i][j]>>20, pair[i][j]>>6&63, i, j]); out.push([pair[i][j]>>20, pair[i][j]>>6&63, i, j, (gsuf[i] + gsuf[j])<<16|(gsub[i] + gsub[j])]);
out.sort(function(a, b) { return a[0]!=b[0]? a[0]-b[0] : b[1]-a[1]}); out.sort(function(a, b) { return a[0]!=b[0]? a[0]-b[0] : a[1]!=b[1]? b[1]-a[1] : a[4]!=b[4]? a[4]-b[4] : a[2]!=b[2]? a[2]-b[2] : a[3]-b[3]});
for (var i = 0; i < out.length; ++i) for (var i = 0; i < out.length; ++i)
print(glist[out[i][2]], glist[out[i][3]], out[i][0], out[i][1]); print(glist[out[i][2]], glist[out[i][3]], out[i][0], out[i][1]);

View File

@ -1,28 +1,39 @@
#!/bin/bash #!/bin/bash
is_ctg=0
if [ $# -gt 1 ] && [ $1 == '-A' ]; then
is_ctg=1
shift
fi
if [ $# -lt 2 ]; then if [ $# -lt 2 ]; then
echo "Usage: $0 <prefix> <gene>" echo "Usage: $0 [-A] <prefix> <gene>"
exit 1 exit 1
fi fi
root=`dirname $0` root=`dirname $0`
pre=$1.$2 pre=$1.$2
len=`$root/seqtk comp $pre.fq | awk '{++x;y+=$2}END{printf("%.0f\n", y/x)}'`
# de novo assembly # de novo assembly
$root/fermi2.pl unitig -t2 -l$len -p $pre.tmp $pre.fq > $pre.tmp.mak if [ $is_ctg -eq 0 ]; then
make -f $pre.tmp.mak len=`$root/seqtk comp $pre.fq | awk '{++x;y+=$2}END{printf("%.0f\n", y/x)}'`
$root/fermi2.pl unitig -t2 -l$len -p $pre.tmp $pre.fq > $pre.tmp.mak
make -f $pre.tmp.mak EXE_FERMI2=$root/fermi2 EXE_ROPEBWT2=$root/ropebwt2
else
ln -s $pre.fq $pre.tmp.mag.gz
fi
# get contigs overlapping HLA exons # get contigs overlapping HLA exons
$root/bwa mem -B1 -O1 -E1 $root/HLA-ALT.fa $pre.tmp.mag.gz | grep -v ^@ | sort -k3,3 -k4,4n | bgzip > $pre.tmp.alt.gz $root/bwa mem -B1 -O1 -E1 $root/HLA-ALT.fa $pre.tmp.mag.gz | grep -v ^@ | sort -k3,3 -k4,4n | bgzip > $pre.tmp.alt.gz
$root/tabix -Bpsam $pre.tmp.alt.gz $root/HLA-approx.anno | cut -f1 | sort | uniq | $root/seqtk subseq $pre.tmp.mag.gz - | gzip -1 > $pre.tmp.fq.gz $root/tabix -Bpsam $pre.tmp.alt.gz $root/HLA-approx.anno | cut -f1 | sort | uniq | $root/seqtk subseq $pre.tmp.mag.gz - | gzip -1 > $pre.tmp.fq.gz
# map HLA exons to de novo contigs # map HLA exons to de novo contigs
$root/bwa index -p $pre.tmp $pre.tmp.fq.gz $root/bwa index -p $pre.tmp $pre.tmp.fq.gz 2> /dev/null
$root/bwa mem -aD.1 -t2 $pre.tmp $root/hla.fa | egrep "^(@|$2)" > $pre.tmp.sam $root/bwa mem -aD.1 -t2 $pre.tmp $root/hla.fa | egrep "^(@|$2)" > $pre.sam
# type # type HLA
$root/k8 $root/bwa-typeHLA.js $pre.tmp.sam > $pre.gt $root/k8 $root/bwa-typeHLA.js $pre.sam > $pre.gt
# delete temporary files # delete temporary files
rm -f $pre.tmp.* rm -f $pre.tmp.*