From ace18171b0a8f47791c48bbdfb4c49772fe8760f Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 4 Nov 2014 10:07:14 -0500 Subject: [PATCH] backup --- bwa-typeHLA.js | 21 +++++++++++++++++---- bwa-typeHLA.sh | 27 +++++++++++++++++++-------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/bwa-typeHLA.js b/bwa-typeHLA.js index ad47387..7a59d60 100644 --- a/bwa-typeHLA.js +++ b/bwa-typeHLA.js @@ -47,7 +47,7 @@ var getopt = function(args, ostr) { *** Main function *** *********************/ -var c, fuzzy = 1, min_qal = 100, drop_thres = 7; +var c, fuzzy = 1, min_qal = 100, drop_thres = 5; // parse command line options while ((c = getopt(arguments, "f:m:d:")) != null) { @@ -212,8 +212,11 @@ for (var e = 0; e < exons.length; ++e) { var gj = ga[j], g2 = sc[gj], m = 0; for (var k = 0; k < ca.length; ++k) { var c = ca[k]; - if (!dropped[c]) + if (!dropped[c]) { m += g1[c] < g2[c]? g1[c] : g2[c]; + if ((gi == 518 && gj == 653) || (gi == 653 && gj == 518)) + print(e+1, clist[c], g1[c], g2[c]); + } } var x = m<<20 | 1<<6 | pri_exon[e]; if (gi < gj) pair[gj][gi] += x; @@ -223,6 +226,16 @@ for (var e = 0; e < exons.length; ++e) { } } +// extract the 3rd and 4th digits +var gsub = [], gsuf = []; +for (var i = 0; i < glist.length; ++i) { + var m = /^HLA-[^*\s]+\*\d+:(\d+).*([A-Z]?)$/.exec(glist[i]); + gsub[i] = parseInt(m[1]); + gsuf[i] = /[A-Z]$/.test(glist[i])? 1 : 0; +} + +warn(ghash["HLA-DQB1*06:04:02"], ghash["HLA-DQB1*06:44"], pair[518][518].toString(16), pair[653][518].toString(16)); + // genotyping var min_nm = 0x7fffffff; for (var i = 0; i < glist.length; ++i) @@ -234,8 +247,8 @@ var out = []; for (var i = 0; i < glist.length; ++i) for (var j = 0; j <= i; ++j) if ((pair[i][j]&63) == n_pri_exons && pair[i][j]>>20 <= min_nm + fuzzy) - out.push([pair[i][j]>>20, pair[i][j]>>6&63, i, j]); + out.push([pair[i][j]>>20, pair[i][j]>>6&63, i, j, (gsuf[i] + gsuf[j])<<16|(gsub[i] + gsub[j])]); -out.sort(function(a, b) { return a[0]!=b[0]? a[0]-b[0] : b[1]-a[1]}); +out.sort(function(a, b) { return a[0]!=b[0]? a[0]-b[0] : a[1]!=b[1]? b[1]-a[1] : a[4]!=b[4]? a[4]-b[4] : a[2]!=b[2]? a[2]-b[2] : a[3]-b[3]}); for (var i = 0; i < out.length; ++i) print(glist[out[i][2]], glist[out[i][3]], out[i][0], out[i][1]); diff --git a/bwa-typeHLA.sh b/bwa-typeHLA.sh index 2e5c123..e112209 100755 --- a/bwa-typeHLA.sh +++ b/bwa-typeHLA.sh @@ -1,28 +1,39 @@ #!/bin/bash +is_ctg=0 + +if [ $# -gt 1 ] && [ $1 == '-A' ]; then + is_ctg=1 + shift +fi + if [ $# -lt 2 ]; then - echo "Usage: $0 " + echo "Usage: $0 [-A] " exit 1 fi root=`dirname $0` pre=$1.$2 -len=`$root/seqtk comp $pre.fq | awk '{++x;y+=$2}END{printf("%.0f\n", y/x)}'` # de novo assembly -$root/fermi2.pl unitig -t2 -l$len -p $pre.tmp $pre.fq > $pre.tmp.mak -make -f $pre.tmp.mak +if [ $is_ctg -eq 0 ]; then + len=`$root/seqtk comp $pre.fq | awk '{++x;y+=$2}END{printf("%.0f\n", y/x)}'` + $root/fermi2.pl unitig -t2 -l$len -p $pre.tmp $pre.fq > $pre.tmp.mak + make -f $pre.tmp.mak EXE_FERMI2=$root/fermi2 EXE_ROPEBWT2=$root/ropebwt2 +else + ln -s $pre.fq $pre.tmp.mag.gz +fi # get contigs overlapping HLA exons $root/bwa mem -B1 -O1 -E1 $root/HLA-ALT.fa $pre.tmp.mag.gz | grep -v ^@ | sort -k3,3 -k4,4n | bgzip > $pre.tmp.alt.gz $root/tabix -Bpsam $pre.tmp.alt.gz $root/HLA-approx.anno | cut -f1 | sort | uniq | $root/seqtk subseq $pre.tmp.mag.gz - | gzip -1 > $pre.tmp.fq.gz # map HLA exons to de novo contigs -$root/bwa index -p $pre.tmp $pre.tmp.fq.gz -$root/bwa mem -aD.1 -t2 $pre.tmp $root/hla.fa | egrep "^(@|$2)" > $pre.tmp.sam +$root/bwa index -p $pre.tmp $pre.tmp.fq.gz 2> /dev/null +$root/bwa mem -aD.1 -t2 $pre.tmp $root/hla.fa | egrep "^(@|$2)" > $pre.sam -# type -$root/k8 $root/bwa-typeHLA.js $pre.tmp.sam > $pre.gt +# type HLA +$root/k8 $root/bwa-typeHLA.js $pre.sam > $pre.gt # delete temporary files rm -f $pre.tmp.*