From c901fb6d7081b5f9610c0d429ab4c2852bf79671 Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 19 Jan 2011 18:19:10 +0000 Subject: [PATCH] Now populating the refseq and dbsnp in awk instead of retrieving from firehose. Added refseq table to the pipeline object. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5020 348d0f76-0448-11de-a6fe-93d51630548a --- .../datasources/pipeline/PipelineProject.java | 9 +++++++ shell/getFirehosePipelineYaml.sh | 26 +++++++++++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java index 8c8e8c7f1..9b51beb53 100644 --- a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java +++ b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java @@ -36,6 +36,7 @@ public class PipelineProject { private File referenceFile; private File intervalList; private File dbsnpFile; + private File refseqTable; private Map tags = new TreeMap(); public String getName() { @@ -70,6 +71,14 @@ public class PipelineProject { this.dbsnpFile = dbsnpFile; } + public File getRefseqTable() { + return refseqTable; + } + + public void setRefseqTable(File refseqTable) { + this.refseqTable = refseqTable; + } + public Map getTags() { return tags; } diff --git a/shell/getFirehosePipelineYaml.sh b/shell/getFirehosePipelineYaml.sh index 46e45bc09..fde8b52fb 100755 --- a/shell/getFirehosePipelineYaml.sh +++ b/shell/getFirehosePipelineYaml.sh @@ -27,7 +27,7 @@ PIPELINE_YAML_FILE=$ENTITY_SET_ID.yaml # Annotations to pull down from Firehose -FIREHOSE_ANNOTATIONS=(reference_file dbsnp_file interval_list \ +FIREHOSE_ANNOTATIONS=(reference_file interval_list \ sample_id recalibrated_bam_file squid_project collaborator_id) # YAML templates @@ -37,8 +37,9 @@ PROJECT_YAML_TEMPLATE='"\n\ name: '"$ENTITY_SET_ID"',\n\ referenceFile: %s,\n\ dbsnpFile: %s,\n\ + refseqTable: %s,\n\ intervalList: %s\n\ - },", $1, $2, $3' + },", $1, dbsnp, refseq, $2' SAMPLE_YAML_TEMPLATE='"\n\ {\n\ @@ -48,7 +49,7 @@ SAMPLE_YAML_TEMPLATE='"\n\ SQUIDProject: %s,\n\ CollaboratorID: %s\n\ }\n\ - }", $4, $5, $6, $7' + }", $3, $4, $5, $6' TEST_AWK_COUNT=`echo '\n' | awk '{print $0}' | wc -c` if [ "$TEST_AWK_COUNT" -eq 2 ]; then @@ -82,12 +83,27 @@ $FIREHOSE_TEST_HARNESS \ # Generate yaml from firehose output . firehose-populated-commands.sh | awk ' BEGIN { + refseq_dir = "/humgen/gsa-hpprojects/GATK/data/Annotations/refseq/"; + dbsnp_dir = "/humgen/gsa-hpprojects/GATK/data/"; + + dbsnps["Homo_sapiens_assembly18.fasta"] = dbsnp_dir "dbsnp_129_hg18.rod"; + refseqs["Homo_sapiens_assembly18.fasta"] = refseq_dir "refGene-big-table-hg18.txt"; + + dbsnps["Homo_sapiens_assembly19.fasta"] = dbsnp_dir "dbsnp_132_b37.vcf"; + refseqs["Homo_sapiens_assembly19.fasta"] = refseq_dir "refGene-big-table-hg19.txt"; + printf "{" } { if (NR == 1) { - printf '"$PROJECT_YAML_TEMPLATE"' - printf "\n samples: [" + reference_part_count = split($1, reference_parts, "/") + reference_name = reference_parts[reference_part_count]; + + dbsnp = dbsnps[reference_name]; + refseq = refseqs[reference_name]; + + printf '"$PROJECT_YAML_TEMPLATE"' + printf "\n samples: [" } else { printf "," }