diff --git a/ivy.xml b/ivy.xml index c62acf8c8..a078c0f40 100644 --- a/ivy.xml +++ b/ivy.xml @@ -17,7 +17,7 @@ - + diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java b/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java new file mode 100644 index 000000000..f8f8b2d29 --- /dev/null +++ b/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.datasources.pipeline; + +import java.util.ArrayList; +import java.util.List; + +/** + * Java bean for storing a list of samples for a pipeline. + * + * NOTE: This class is used in a very similar way to the classes in + * org.broadinstitute.sting.gatk.datasources.sample. + * + * Both store / load sample information from the file system as YAML. + * + * This package will likely be refactored to share common functionality + * with the other at a future date as requirements coalesce. + * + * - kshakir September 22, 2010 + */ +public class Pipeline { + private PipelineProject project = new PipelineProject(); + private List samples = new ArrayList(); + + public PipelineProject getProject() { + return project; + } + + public void setProject(PipelineProject project) { + this.project = project; + } + + public List getSamples() { + return samples; + } + + public void setSamples(List samples) { + this.samples = samples; + } +} diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java new file mode 100644 index 000000000..8c8e8c7f1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.datasources.pipeline; + +import java.io.File; +import java.util.Map; +import java.util.TreeMap; + +/** + * Java bean defining the project for a pipeline. + */ +public class PipelineProject { + private String name; + private File referenceFile; + private File intervalList; + private File dbsnpFile; + private Map tags = new TreeMap(); + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public File getIntervalList() { + return intervalList; + } + + public void setIntervalList(File intervalList) { + this.intervalList = intervalList; + } + + public File getReferenceFile() { + return referenceFile; + } + + public void setReferenceFile(File referenceFile) { + this.referenceFile = referenceFile; + } + + public File getDbsnpFile() { + return dbsnpFile; + } + + public void setDbsnpFile(File dbsnpFile) { + this.dbsnpFile = dbsnpFile; + } + + public Map getTags() { + return tags; + } + + public void setTags(Map tags) { + this.tags = tags; + } +} diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java new file mode 100644 index 000000000..701841302 --- /dev/null +++ b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.datasources.pipeline; + +import java.io.File; +import java.util.Map; +import java.util.TreeMap; + +/** + * Java bean defining a sample for a pipeline. + */ +public class PipelineSample { + private String id; + private Map bamFiles = new TreeMap(); + private Map tags = new TreeMap(); + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Map getBamFiles() { + return bamFiles; + } + + public void setBamFiles(Map bamFiles) { + this.bamFiles = bamFiles; + } + + public Map getTags() { + return tags; + } + + public void setTags(Map tags) { + this.tags = tags; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java index eedc15e7f..55f4e2fd5 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java @@ -6,7 +6,6 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.yaml.snakeyaml.Loader; import org.yaml.snakeyaml.TypeDescription; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.Constructor; @@ -115,8 +114,7 @@ public class SampleDataSource { desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class); desc.putListPropertyType("sampleAliases", SampleAlias.class); con.addTypeDescription(desc); - Loader loader = new Loader(con); - Yaml yaml = new Yaml(loader); + Yaml yaml = new Yaml(con); // SampleFileParser stores an object representation of a sample file - this is what we'll parse SampleFileParser parser; diff --git a/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java b/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java new file mode 100644 index 000000000..2a043466a --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.yaml; + +import org.yaml.snakeyaml.introspector.Property; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +/** + * Orders properties based on the order of the fields in the Java Bean. + */ +class FieldOrderComparator implements Comparator { + private final List propertyOrder; + + public FieldOrderComparator(Class clazz) { + propertyOrder = new ArrayList(); + for (Field field : clazz.getDeclaredFields()) + propertyOrder.add(field.getName()); + } + + @Override + public int compare(Property one, Property two) { + Integer index1 = propertyOrder.indexOf(one.getName()); + Integer index2 = propertyOrder.indexOf(two.getName()); + return index1.compareTo(index2); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java b/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java new file mode 100644 index 000000000..157b1ce27 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.yaml; + +import org.yaml.snakeyaml.introspector.Property; +import org.yaml.snakeyaml.nodes.*; +import org.yaml.snakeyaml.representer.Represent; +import org.yaml.snakeyaml.representer.Representer; + +import java.beans.IntrospectionException; +import java.io.File; +import java.util.Set; +import java.util.TreeSet; + +/** + * A representer with Sting prefered settings. + * - Fields are ordered in the order of the class declaration, instead of alphabetically. + * - Empty maps and sequences are not output. + * - Files are converted to their absolute paths. + */ +public class StingYamlRepresenter extends Representer { + + public StingYamlRepresenter() { + super(); + this.representers.put(File.class, new RepresentFile()); + } + + @Override + protected Set getProperties(Class type) throws IntrospectionException { + TreeSet properties = new TreeSet(new FieldOrderComparator(type)); + properties.addAll(super.getProperties(type)); + return properties; + } + + @Override + protected NodeTuple representJavaBeanProperty(Object javaBean, Property property, + Object propertyValue, Tag customTag) { + NodeTuple tuple = super.representJavaBeanProperty(javaBean, property, propertyValue, customTag); + Node valueNode = tuple.getValueNode(); + if (Tag.NULL.equals(valueNode.getTag())) { + return null;// skip 'null' values + } + if (valueNode instanceof CollectionNode) { + if (Tag.SEQ.equals(valueNode.getTag())) { + SequenceNode seq = (SequenceNode) valueNode; + if (seq.getValue().isEmpty()) { + return null;// skip empty lists + } + } + if (Tag.MAP.equals(valueNode.getTag())) { + MappingNode seq = (MappingNode) valueNode; + if (seq.getValue().isEmpty()) { + return null;// skip empty maps + } + } + } + return tuple; + } + + private class RepresentFile implements Represent { + @Override + public Node representData(Object o) { + return StingYamlRepresenter.this.representScalar(Tag.STR, ((File)o).getPath()); + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java b/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java new file mode 100644 index 000000000..715c71efc --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.yaml; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.yaml.snakeyaml.DumperOptions; +import org.yaml.snakeyaml.Yaml; +import org.yaml.snakeyaml.constructor.Constructor; +import org.yaml.snakeyaml.nodes.Tag; +import org.yaml.snakeyaml.representer.Representer; + +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; + +/** + * A collection of utilities for operating on YAML. + * Uses the FLOW style of writing YAML, versus the BLOCK style. + * By default uses a representer that prunes empty lists and maps. + */ +public class YamlUtils { + private static Representer representer = new StingYamlRepresenter(); + private static DumperOptions options = new DumperOptions(); + + static { + options.setCanonical(false); + options.setExplicitRoot(Tag.MAP); + options.setDefaultFlowStyle(DumperOptions.FlowStyle.FLOW); + options.setPrettyFlow(true); + } + + /** + * Serialize an object to the file system. + * @param o Object to serialize. + * @param file Path to write the serialized YAML. + */ + public static void dump(Object o, File file) { + dump(o, file, representer); + } + + /** + * Serialize an object to the file system. + * @param o Object to serialize. + * @param file Path to write the serialized YAML. + * @param representer Custom representer with rules on how to serialize YAML. + */ + public static void dump(Object o, File file, Representer representer) { + Constructor constructor = new Constructor(o.getClass()); + Yaml yaml = new Yaml(constructor, representer, options); + try { + yaml.dump(o, new FileWriter(file)); + } catch (IOException ioe) { + throw new UserException.CouldNotCreateOutputFile(file, ioe); + } + } + + /** + * Deserialize an object from the file system. + * @param clazz Clazz to deserialize. + * @param file Path to read the deserialized YAML. + * @return Object deserialized from the file system. + */ + public static T load(Class clazz, File file) { + return load(clazz, file, representer); + } + + /** + * Deserialize an object from the file system. + * @param clazz Clazz to deserialize. + * @param file Path to read the deserialized YAML. + * @param representer Custom representer with rules on how to deserialize YAML. + * @return Object deserialized from the file system. + */ + @SuppressWarnings("unchecked") + public static T load(Class clazz, File file, Representer representer) { + Constructor constructor = new Constructor(clazz); + Yaml yaml = new Yaml(constructor, representer, options); + try { + return (T) yaml.load(new FileReader(file)); + } catch (IOException ioe) { + throw new UserException.CouldNotReadInputFile(file, ioe); + } + } +} diff --git a/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java b/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java new file mode 100644 index 000000000..7a32bcea5 --- /dev/null +++ b/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.datasources.pipeline; + +import org.broadinstitute.sting.utils.yaml.YamlUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.Map; + +public class PipelineUnitTest { + @Test + public void testDumpAndLoad() throws Exception { + Pipeline pipeline = new Pipeline(); + + pipeline.getProject().setName("PRJ_NAME"); + pipeline.getProject().setReferenceFile(new File("my.fasta")); + pipeline.getProject().setDbsnpFile(new File("my.dbsnp")); + pipeline.getProject().getTags().put("testProjectTag", "project value here"); + + PipelineSample sample = new PipelineSample(); + sample.setId("SMP_ID"); + sample.getBamFiles().put("recalibrated", new File("recalibrated.bam")); + sample.getBamFiles().put("cleaned", new File("/absolute/path/to/cleaned.bam")); + sample.getTags().put("testSampleTag", "sample value here"); + + pipeline.getSamples().add(sample); + + File file = File.createTempFile("testDumpAndLoad", ".yaml"); + YamlUtils.dump(pipeline, file); + Pipeline pipelineLoad = YamlUtils.load(Pipeline.class, file); + + Assert.assertEquals(pipeline.getProject().getName(), pipelineLoad.getProject().getName()); + Assert.assertEquals(pipeline.getProject().getReferenceFile(), pipelineLoad.getProject().getReferenceFile()); + Assert.assertEquals(pipeline.getProject().getIntervalList(), pipelineLoad.getProject().getIntervalList()); + Assert.assertEquals(pipeline.getProject().getDbsnpFile(), pipelineLoad.getProject().getDbsnpFile()); + + Assert.assertEquals(pipeline.getProject().getTags().size(), pipelineLoad.getProject().getTags().size()); + for (Map.Entry entry : pipeline.getProject().getTags().entrySet()) + Assert.assertEquals(entry.getValue(), pipeline.getProject().getTags().get(entry.getKey())); + + Assert.assertEquals(pipeline.getSamples().size(), pipelineLoad.getSamples().size()); + for (int i = 0; i < pipeline.getSamples().size(); i++) { + PipelineSample pipelineSample = pipeline.getSamples().get(i); + PipelineSample pipelineLoadSample = pipelineLoad.getSamples().get(i); + + Assert.assertEquals(pipelineSample.getId(), pipelineLoadSample.getId()); + + Assert.assertEquals(pipelineSample.getBamFiles().size(), pipelineLoadSample.getBamFiles().size()); + for (Map.Entry entry : pipelineSample.getBamFiles().entrySet()) + Assert.assertEquals(entry.getValue(), pipelineSample.getBamFiles().get(entry.getKey())); + + Assert.assertEquals(pipelineSample.getTags().size(), pipelineLoadSample.getTags().size()); + for (Map.Entry entry : pipelineSample.getTags().entrySet()) + Assert.assertEquals(entry.getValue(), pipelineSample.getTags().get(entry.getKey())); + } + } +} diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q index 402e9dc25..05fccf16c 100755 --- a/scala/qscript/fullCallingPipeline.q +++ b/scala/qscript/fullCallingPipeline.q @@ -1,24 +1,24 @@ +import org.broadinstitute.sting.datasources.pipeline.Pipeline import org.broadinstitute.sting.gatk.DownsampleType import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction import org.broadinstitute.sting.queue.extensions.samtools._ import org.broadinstitute.sting.queue.{QException, QScript} +import collection.JavaConversions._ +import org.broadinstitute.sting.utils.yaml.YamlUtils class fullCallingPipeline extends QScript { qscript => - @Argument(doc = "reference", shortName="R") - var reference: File = _ - @Argument(doc="contigIntervals", shortName="contigIntervals") var contigIntervals: File = _ @Argument(doc="numContigs", shortName="numContigs") var numContigs: Int = _ - @Argument(doc="project", shortName="project") - var project: String = _ + @Argument(fullName="pipeline_yaml", shortName="PY", doc="Pipeline YAML file") + var pipelineYamlFile: File = _ @Input(doc="trigger", shortName="trigger", required=false) var trigger: File = _ @@ -35,12 +35,6 @@ class fullCallingPipeline extends QScript { @Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/") var picardFixMatesJar: File = _ - @Input(doc="intervals") - var intervals: File = _ - - @Input(doc="bam files", shortName="I") - var bamFiles: List[File] = Nil - @Input(doc="gatk jar") var gatkJar: File = _ @@ -50,9 +44,6 @@ class fullCallingPipeline extends QScript { @Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false) var snpClusterWindow = 7 - @Input(doc="dbSNP version",shortName="D") - var dbSNP: File = _ - @Input(doc="target titv for recalibration",shortName="titv",required=false) var target_titv = 2.1 @@ -65,11 +56,12 @@ class fullCallingPipeline extends QScript { @Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false) var num_indel_scatter_jobs = 5 + private var pipeline: Pipeline = _ trait CommandLineGATKArgs extends CommandLineGATK { - this.intervals = qscript.intervals + this.intervals = qscript.pipeline.getProject.getIntervalList this.jarFile = qscript.gatkJar - this.reference_sequence = qscript.reference + this.reference_sequence = qscript.pipeline.getProject.getReferenceFile } @@ -77,20 +69,26 @@ class fullCallingPipeline extends QScript { def script = { - val projectBase: String = qscript.project + pipeline = YamlUtils.load(classOf[Pipeline], qscript.pipelineYamlFile) + val projectBase: String = qscript.pipeline.getProject.getName val cleanedBase: String = projectBase + ".cleaned" val uncleanedBase: String = projectBase + ".uncleaned" // there are commands that use all the bam files - var cleanBamFiles = List.empty[File] + val recalibratedSamples = qscript.pipeline.getSamples + .filter(_.getBamFiles.contains("recalibrated")) - for ( bam <- qscript.bamFiles ) { + for ( sample <- recalibratedSamples ) { // put unclean bams in unclean genotypers // in advance, create the extension files + val bam = sample.getBamFiles.get("recalibrated") + if (!sample.getBamFiles.contains("cleaned")) + sample.getBamFiles.put("cleaned", swapExt(bam,"bam","cleaned.bam")) + val cleaned_bam = sample.getBamFiles.get("cleaned") + val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list") - val cleaned_bam = swapExt(bam,"bam","cleaned.bam") // note-- the scatter is in the definition itself // create the cleaning commands @@ -131,8 +129,6 @@ class fullCallingPipeline extends QScript { gather.jarFile = qscript.picardFixMatesJar // Don't pass this AS=true to fix mates! gather.assumeSorted = None - case (gather: SimpleTextGatherFunction, _) => - throw new QException("Cannot text-gather a realignment job") } } else { realigner.out = swapExt(bam,"bam","unfixed.cleaned.bam") @@ -149,10 +145,6 @@ class fullCallingPipeline extends QScript { var samtoolsindex = new SamtoolsIndexFunction samtoolsindex.bamFile = cleaned_bam - // put clean bams in clean genotypers - - cleanBamFiles :+= cleaned_bam - // COMMENT THIS NEXT BLOCK TO SKIP CLEANING if ( realigner.scatterCount > 1 ) add(targetCreator,realigner,samtoolsindex) @@ -160,8 +152,17 @@ class fullCallingPipeline extends QScript { add(targetCreator,realigner,fixMates,samtoolsindex) } + val recalibratedBamFiles = recalibratedSamples + .map(_.getBamFiles.get("recalibrated")) + .toList + + val cleanBamFiles = qscript.pipeline.getSamples + .filter(_.getBamFiles.contains("cleaned")) + .map(_.getBamFiles.get("cleaned")) + .toList + // actually make calls - endToEnd(uncleanedBase,qscript.bamFiles) + endToEnd(uncleanedBase,recalibratedBamFiles) // COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES endToEnd(cleanedBase,cleanBamFiles) } @@ -218,7 +219,7 @@ class fullCallingPipeline extends QScript { loopNo += 1 } val mergeIndels = new CombineVariants with CommandLineGATKArgs - mergeIndels.out = new TaggedFile(qscript.project+".indels.vcf","vcf") + mergeIndels.out = new TaggedFile(qscript.pipeline.getProject.getName+".indels.vcf","vcf") mergeIndels.genotypemergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.GenotypeMergeType.UNIQUIFY) mergeIndels.priority = priority mergeIndels.variantmergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION) @@ -259,7 +260,7 @@ class fullCallingPipeline extends QScript { // todo -- args for resources (properties file) val clusters = new GenerateVariantClusters with CommandLineGATKArgs clusters.rodBind :+= RodBind("input", "VCF", masker.out) - clusters.DBSNP = qscript.dbSNP + clusters.DBSNP = qscript.pipeline.getProject.getDbsnpFile val clusters_clusterFile = swapExt(new File(snps.out.getAbsolutePath),".vcf",".cluster") clusters.clusterFile = clusters_clusterFile clusters.memoryLimit = Some(4) @@ -271,7 +272,7 @@ class fullCallingPipeline extends QScript { // 3.ii apply gaussian clusters to the masked vcf val recalibrate = new VariantRecalibrator with CommandLineGATKArgs recalibrate.clusterFile = clusters.clusterFile - recalibrate.DBSNP = qscript.dbSNP + recalibrate.DBSNP = qscript.pipeline.getProject.getDbsnpFile recalibrate.rodBind :+= RodBind("input", "VCF", masker.out) recalibrate.out = swapExt(masker.out,".vcf",".recalibrated.vcf") recalibrate.target_titv = qscript.target_titv diff --git a/shell/getFirehosePipelineYaml.sh b/shell/getFirehosePipelineYaml.sh new file mode 100644 index 000000000..6966c2785 --- /dev/null +++ b/shell/getFirehosePipelineYaml.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +# Downloads a set of samples from Firehose using the Firehose Test Harness and awk to generate a YAML file. + +ENTITY_SET_ID=$1 +ENTITY_SET_TYPE=Sample_Set +ENTITY_TYPE=Sample + +if [ "$ENTITY_SET_ID" == "" ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +# Firehose variables + +FIREHOSE_SOURCE_HOME=/humgen/gsa-firehose/firehose/source +CGA_HOME=$FIREHOSE_SOURCE_HOME/CancerGenomeAnalysis +FIREHOSE_TEST_HARNESS="python $CGA_HOME/analysis_pipeline/scripts/firehose_test_harness.py" +FIREHOSE_HOST=firehose +FIREHOSE_PORT=8080 +FIREHOSE_DOMAIN=gsa +FIREHOSE_WORKSPACE=trunk + +# YAML file to write + +PIPELINE_YAML_FILE=$ENTITY_SET_ID.yaml + +# Annotations to pull down from Firehose + +FIREHOSE_ANNOTATIONS=(reference_file dbsnp_file interval_list \ + sample_id recalibrated_bam_file squid_project collaborator_id) + +# YAML templates + +PROJECT_YAML_TEMPLATE='" \ + project: { \ + name: '"$ENTITY_SET_ID"', \ + referenceFile: %s, \ + dbsnpFile: %s, \ + intervalList: %s \ + },", $1, $2, $3' + +SAMPLE_YAML_TEMPLATE='" \ + { \ + id: %s, \ + bamFiles: { recalibrated: %s }, \ + tags: { \ + SQUIDProject: %s, \ + CollaboratorID: %s \ + } \ + }", $4, $5, $6, $7' + +index=0 +count=${#FIREHOSE_ANNOTATIONS[@]} +FIREHOSE_VARIABLES="" +TAB=' ' + +# Build the tab separated list of firehose arguments + +while [ "$index" -lt "$count" ]; do + if [ "$FIREHOSE_VARIABLES" != "" ]; then + FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES$TAB + fi + FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES'${'${FIREHOSE_ANNOTATIONS[$index]}'}' + let "index = $index + 1" +done + +# Retrieve all the required variables and run the pipeline in Queue. +$FIREHOSE_TEST_HARNESS \ + -d $FIREHOSE_DOMAIN -w $FIREHOSE_WORKSPACE \ + -t $ENTITY_TYPE -f $ENTITY_SET_ID -y $ENTITY_SET_TYPE \ + "echo '$FIREHOSE_VARIABLES'" && \ +\ +# Generate yaml from firehose output +. firehose-populated-commands.sh | awk ' +BEGIN { + printf "{" +} +{ + if (NR == 1) { + printf '"$PROJECT_YAML_TEMPLATE"' + printf "\n samples: [" + } else { + printf "," + } + printf '"$SAMPLE_YAML_TEMPLATE"' +} +END { + if (NR > 0) + printf "\n ]" + print "\n}" +}' > $PIPELINE_YAML_FILE