updated decoy sequences and names

This commit is contained in:
Heng Li 2014-12-20 22:24:13 -05:00
parent c561759222
commit 32c4e3fe5a
2 changed files with 9 additions and 11 deletions

View File

@ -17,7 +17,7 @@ other programs or use data in `bwa.kit`. The following shows an example about
how to use bwakit:
```sh
# Download bwakit (or from <http://sourceforge.net/projects/bio-bwa/files/bwakit/> manually)
# Download the bwa-0.7.11 binary package (download link may change)
wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.11_x64-linux.tar.bz2/download \
| gzip -dc | tar xf -
# Generate the GRCh38+ALT+decoy+HLA and create the BWA index
@ -50,7 +50,7 @@ bwa.kit
|-- README.md This README file.
|-- run-bwamem *Entry script* for the entire mapping pipeline.
|-- bwa *BWA binary*
|-- k8 Interpreter for *.js scripts.
|-- k8 Interpretor for *.js scripts.
|-- bwa-postalt.js Post-process alignments to ALT contigs/decoys/HLA genes.
|-- htsbox Used by run-bwamem for shuffling BAMs and BAM=>FASTQ.
|-- samblaster MarkDuplicates for reads from the same library. v0.1.20
@ -60,10 +60,8 @@ bwa.kit
|
|-- run-gen-ref *Entry script* for generating human reference genomes.
|-- resource-GRCh38 Resources for generating GRCh38
| |-- hs38d6-decoy.nt.anno Top decoy-to-nt hits. Not used by any scripts.
| |-- hs38d6-decoy.rm.out RepeatMasker report. Not used.
| |-- hs38d6-extra.fa Decoy and HLA gene sequences. Used by run-gen-ref.
| `-- hs38d6.fa.alt ALT-to-GRCh38 alignment. Used by run-gen-ref.
| |-- hs38D1-extra.fa Decoy and HLA gene sequences. Used by run-gen-ref.
| `-- hs38D1.fa.alt ALT-to-GRCh38 alignment. Used by run-gen-ref.
|
|-- run-HLA HLA typing for sequences extracted by bwa-postalt.js.
|-- typeHLA.sh Type one HLA-gene. Called by run-HLA.

View File

@ -6,16 +6,16 @@ url38="ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals
url37d5="ftp://ftp.ncbi.nlm.nih.gov/1000genomes/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz"
if [ $# -eq 0 ]; then
echo "Usage: $0 <hs38|hs38a|hs38d6|hs37|hs37d5>"
echo "Usage: $0 <hs38|hs38a|hs38D1|hs37|hs37d5>"
exit 1;
fi
if [ $1 == "hs38d6" ]; then
(wget -O- $url38 | gzip -dc; cat $root/resource-GRCh38/hs38d6-extra.fa) > $1.fa
[ ! -f $1.fa.alt ] && cp $root/resource-GRCh38/hs38d6.fa.alt $1.fa.alt
if [ $1 == "hs38D1" ]; then
(wget -O- $url38 | gzip -dc; cat $root/resource-GRCh38/hs38D1-extra.fa) > $1.fa
[ ! -f $1.fa.alt ] && cp $root/resource-GRCh38/hs38D1.fa.alt $1.fa.alt
elif [ $1 == "hs38a" ]; then
wget -O- $url38 | gzip -dc > $1.fa
[ ! -f $1.fa.alt ] && grep _alt $root/resource-GRCh38/hs38d6.fa.alt > $1.fa.alt
[ ! -f $1.fa.alt ] && grep _alt $root/resource-GRCh38/hs38D1.fa.alt > $1.fa.alt
elif [ $1 == "hs38" ]; then
wget -O- $url38 | gzip -dc | awk '/^>/{f=/_alt/?0:1}f' > $1.fa
elif [ $1 == "hs37d5" ]; then