From 792be78a8096eb111a090b573d64520a615bc760 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 16 Nov 2014 00:46:02 -0500 Subject: [PATCH] replace run-gen-hs38* with run-gen-ref --- extras/run-gen-hs38a | 9 --------- extras/run-gen-hs38d6 | 9 --------- extras/run-gen-ref | 31 +++++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) delete mode 100755 extras/run-gen-hs38a delete mode 100755 extras/run-gen-hs38d6 create mode 100755 extras/run-gen-ref diff --git a/extras/run-gen-hs38a b/extras/run-gen-hs38a deleted file mode 100755 index cd119ad..0000000 --- a/extras/run-gen-hs38a +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -root=`dirname $0` - -wget -O- ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz \ - | gzip -dc > hs38a.fa - -[ ! -f hs38a.fa.alt ] && grep _alt $root/resource-GRCh38/hs38d6.fa.alt > hs38a.fa.alt -[ ! -f hs38a.fa.bwt ] && echo -e "\nPlease run 'bwa index hs38a.fa'...\n" diff --git a/extras/run-gen-hs38d6 b/extras/run-gen-hs38d6 deleted file mode 100755 index 86d6fa2..0000000 --- a/extras/run-gen-hs38d6 +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -root=`dirname $0` - -(wget -O- ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz \ - | gzip -dc; cat $root/data/hs38d6-extra.fa) > hs38d6.fa - -[ ! -f hs38d6.fa.alt ] && cp $root/resource-GRCh38/hs38d6.fa.alt . -[ ! -f hs38d6.fa.bwt ] && echo -e "\nPlease run 'bwa index hs38d6.fa'...\n" diff --git a/extras/run-gen-ref b/extras/run-gen-ref new file mode 100755 index 0000000..86317c3 --- /dev/null +++ b/extras/run-gen-ref @@ -0,0 +1,31 @@ +#!/bin/bash + +root=`dirname $0` + +url38="ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz" +url37d5="ftp://ftp.ncbi.nlm.nih.gov/1000genomes/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz" + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1; +fi + +if [ $1 == "hs38d6" ]; then + (wget -O- $url38 | gzip -dc; cat $root/resource-GRCh38/hs38d6-extra.fa) > $1.fa + [ ! -f $1.fa.alt ] && cp $root/resource-GRCh38/hs38d6.fa.alt $1.fa.alt +elif [ $1 == "hs38a" ]; then + wget -O- $url38 | gzip -dc > $1.fa + [ ! -f $1.fa.alt ] && grep _alt $root/resource-GRCh38/hs38d6.fa.alt > $1.fa.alt +elif [ $1 == "hs38" ]; then + # we don't use GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz because it lacks EBV + wget -O- $url38 | gzip -dc | awk '/^>/{f=/_alt/?0:1}f' > $1.fa +elif [ $1 == "hs37d5" ]; then + wget -O- $url37d5 | gzip -dc > $1.fa 2>/dev/null +elif [ $1 == "hs37" ]; then + wget -O- $url37d5 | gzip -dc 2>/dev/null | awk '/^>/{f=/>hs37d5/?0:1}f' > $1.fa +else + echo "ERROR: unknown genome build" +fi + +[ ! -f $1.fa.bwt ] && echo -e "\nPlease run 'bwa index $1.fa'...\n" +