diff --git a/ivy.xml b/ivy.xml
index c62acf8c8..a078c0f40 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -17,7 +17,7 @@
-
+
diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java b/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java
new file mode 100644
index 000000000..f8f8b2d29
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/datasources/pipeline/Pipeline.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.datasources.pipeline;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Java bean for storing a list of samples for a pipeline.
+ *
+ * NOTE: This class is used in a very similar way to the classes in
+ * org.broadinstitute.sting.gatk.datasources.sample.
+ *
+ * Both store / load sample information from the file system as YAML.
+ *
+ * This package will likely be refactored to share common functionality
+ * with the other at a future date as requirements coalesce.
+ *
+ * - kshakir September 22, 2010
+ */
+public class Pipeline {
+ private PipelineProject project = new PipelineProject();
+ private List samples = new ArrayList();
+
+ public PipelineProject getProject() {
+ return project;
+ }
+
+ public void setProject(PipelineProject project) {
+ this.project = project;
+ }
+
+ public List getSamples() {
+ return samples;
+ }
+
+ public void setSamples(List samples) {
+ this.samples = samples;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java
new file mode 100644
index 000000000..8c8e8c7f1
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineProject.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.datasources.pipeline;
+
+import java.io.File;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Java bean defining the project for a pipeline.
+ */
+public class PipelineProject {
+ private String name;
+ private File referenceFile;
+ private File intervalList;
+ private File dbsnpFile;
+ private Map tags = new TreeMap();
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public File getIntervalList() {
+ return intervalList;
+ }
+
+ public void setIntervalList(File intervalList) {
+ this.intervalList = intervalList;
+ }
+
+ public File getReferenceFile() {
+ return referenceFile;
+ }
+
+ public void setReferenceFile(File referenceFile) {
+ this.referenceFile = referenceFile;
+ }
+
+ public File getDbsnpFile() {
+ return dbsnpFile;
+ }
+
+ public void setDbsnpFile(File dbsnpFile) {
+ this.dbsnpFile = dbsnpFile;
+ }
+
+ public Map getTags() {
+ return tags;
+ }
+
+ public void setTags(Map tags) {
+ this.tags = tags;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java
new file mode 100644
index 000000000..701841302
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/datasources/pipeline/PipelineSample.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.datasources.pipeline;
+
+import java.io.File;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Java bean defining a sample for a pipeline.
+ */
+public class PipelineSample {
+ private String id;
+ private Map bamFiles = new TreeMap();
+ private Map tags = new TreeMap();
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public Map getBamFiles() {
+ return bamFiles;
+ }
+
+ public void setBamFiles(Map bamFiles) {
+ this.bamFiles = bamFiles;
+ }
+
+ public Map getTags() {
+ return tags;
+ }
+
+ public void setTags(Map tags) {
+ this.tags = tags;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
index eedc15e7f..55f4e2fd5 100644
--- a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
@@ -6,7 +6,6 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.yaml.snakeyaml.Loader;
import org.yaml.snakeyaml.TypeDescription;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.Constructor;
@@ -115,8 +114,7 @@ public class SampleDataSource {
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
desc.putListPropertyType("sampleAliases", SampleAlias.class);
con.addTypeDescription(desc);
- Loader loader = new Loader(con);
- Yaml yaml = new Yaml(loader);
+ Yaml yaml = new Yaml(con);
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
SampleFileParser parser;
diff --git a/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java b/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java
new file mode 100644
index 000000000..2a043466a
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/yaml/FieldOrderComparator.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.yaml;
+
+import org.yaml.snakeyaml.introspector.Property;
+
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Orders properties based on the order of the fields in the Java Bean.
+ */
+class FieldOrderComparator implements Comparator {
+ private final List propertyOrder;
+
+ public FieldOrderComparator(Class> clazz) {
+ propertyOrder = new ArrayList();
+ for (Field field : clazz.getDeclaredFields())
+ propertyOrder.add(field.getName());
+ }
+
+ @Override
+ public int compare(Property one, Property two) {
+ Integer index1 = propertyOrder.indexOf(one.getName());
+ Integer index2 = propertyOrder.indexOf(two.getName());
+ return index1.compareTo(index2);
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java b/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java
new file mode 100644
index 000000000..157b1ce27
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/yaml/StingYamlRepresenter.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.yaml;
+
+import org.yaml.snakeyaml.introspector.Property;
+import org.yaml.snakeyaml.nodes.*;
+import org.yaml.snakeyaml.representer.Represent;
+import org.yaml.snakeyaml.representer.Representer;
+
+import java.beans.IntrospectionException;
+import java.io.File;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * A representer with Sting prefered settings.
+ * - Fields are ordered in the order of the class declaration, instead of alphabetically.
+ * - Empty maps and sequences are not output.
+ * - Files are converted to their absolute paths.
+ */
+public class StingYamlRepresenter extends Representer {
+
+ public StingYamlRepresenter() {
+ super();
+ this.representers.put(File.class, new RepresentFile());
+ }
+
+ @Override
+ protected Set getProperties(Class> type) throws IntrospectionException {
+ TreeSet properties = new TreeSet(new FieldOrderComparator(type));
+ properties.addAll(super.getProperties(type));
+ return properties;
+ }
+
+ @Override
+ protected NodeTuple representJavaBeanProperty(Object javaBean, Property property,
+ Object propertyValue, Tag customTag) {
+ NodeTuple tuple = super.representJavaBeanProperty(javaBean, property, propertyValue, customTag);
+ Node valueNode = tuple.getValueNode();
+ if (Tag.NULL.equals(valueNode.getTag())) {
+ return null;// skip 'null' values
+ }
+ if (valueNode instanceof CollectionNode) {
+ if (Tag.SEQ.equals(valueNode.getTag())) {
+ SequenceNode seq = (SequenceNode) valueNode;
+ if (seq.getValue().isEmpty()) {
+ return null;// skip empty lists
+ }
+ }
+ if (Tag.MAP.equals(valueNode.getTag())) {
+ MappingNode seq = (MappingNode) valueNode;
+ if (seq.getValue().isEmpty()) {
+ return null;// skip empty maps
+ }
+ }
+ }
+ return tuple;
+ }
+
+ private class RepresentFile implements Represent {
+ @Override
+ public Node representData(Object o) {
+ return StingYamlRepresenter.this.representScalar(Tag.STR, ((File)o).getPath());
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java b/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java
new file mode 100644
index 000000000..715c71efc
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/yaml/YamlUtils.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.yaml;
+
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.yaml.snakeyaml.DumperOptions;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.constructor.Constructor;
+import org.yaml.snakeyaml.nodes.Tag;
+import org.yaml.snakeyaml.representer.Representer;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+
+/**
+ * A collection of utilities for operating on YAML.
+ * Uses the FLOW style of writing YAML, versus the BLOCK style.
+ * By default uses a representer that prunes empty lists and maps.
+ */
+public class YamlUtils {
+ private static Representer representer = new StingYamlRepresenter();
+ private static DumperOptions options = new DumperOptions();
+
+ static {
+ options.setCanonical(false);
+ options.setExplicitRoot(Tag.MAP);
+ options.setDefaultFlowStyle(DumperOptions.FlowStyle.FLOW);
+ options.setPrettyFlow(true);
+ }
+
+ /**
+ * Serialize an object to the file system.
+ * @param o Object to serialize.
+ * @param file Path to write the serialized YAML.
+ */
+ public static void dump(Object o, File file) {
+ dump(o, file, representer);
+ }
+
+ /**
+ * Serialize an object to the file system.
+ * @param o Object to serialize.
+ * @param file Path to write the serialized YAML.
+ * @param representer Custom representer with rules on how to serialize YAML.
+ */
+ public static void dump(Object o, File file, Representer representer) {
+ Constructor constructor = new Constructor(o.getClass());
+ Yaml yaml = new Yaml(constructor, representer, options);
+ try {
+ yaml.dump(o, new FileWriter(file));
+ } catch (IOException ioe) {
+ throw new UserException.CouldNotCreateOutputFile(file, ioe);
+ }
+ }
+
+ /**
+ * Deserialize an object from the file system.
+ * @param clazz Clazz to deserialize.
+ * @param file Path to read the deserialized YAML.
+ * @return Object deserialized from the file system.
+ */
+ public static T load(Class extends T> clazz, File file) {
+ return load(clazz, file, representer);
+ }
+
+ /**
+ * Deserialize an object from the file system.
+ * @param clazz Clazz to deserialize.
+ * @param file Path to read the deserialized YAML.
+ * @param representer Custom representer with rules on how to deserialize YAML.
+ * @return Object deserialized from the file system.
+ */
+ @SuppressWarnings("unchecked")
+ public static T load(Class extends T> clazz, File file, Representer representer) {
+ Constructor constructor = new Constructor(clazz);
+ Yaml yaml = new Yaml(constructor, representer, options);
+ try {
+ return (T) yaml.load(new FileReader(file));
+ } catch (IOException ioe) {
+ throw new UserException.CouldNotReadInputFile(file, ioe);
+ }
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java b/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java
new file mode 100644
index 000000000..7a32bcea5
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/datasources/pipeline/PipelineUnitTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.datasources.pipeline;
+
+import org.broadinstitute.sting.utils.yaml.YamlUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.Map;
+
+public class PipelineUnitTest {
+ @Test
+ public void testDumpAndLoad() throws Exception {
+ Pipeline pipeline = new Pipeline();
+
+ pipeline.getProject().setName("PRJ_NAME");
+ pipeline.getProject().setReferenceFile(new File("my.fasta"));
+ pipeline.getProject().setDbsnpFile(new File("my.dbsnp"));
+ pipeline.getProject().getTags().put("testProjectTag", "project value here");
+
+ PipelineSample sample = new PipelineSample();
+ sample.setId("SMP_ID");
+ sample.getBamFiles().put("recalibrated", new File("recalibrated.bam"));
+ sample.getBamFiles().put("cleaned", new File("/absolute/path/to/cleaned.bam"));
+ sample.getTags().put("testSampleTag", "sample value here");
+
+ pipeline.getSamples().add(sample);
+
+ File file = File.createTempFile("testDumpAndLoad", ".yaml");
+ YamlUtils.dump(pipeline, file);
+ Pipeline pipelineLoad = YamlUtils.load(Pipeline.class, file);
+
+ Assert.assertEquals(pipeline.getProject().getName(), pipelineLoad.getProject().getName());
+ Assert.assertEquals(pipeline.getProject().getReferenceFile(), pipelineLoad.getProject().getReferenceFile());
+ Assert.assertEquals(pipeline.getProject().getIntervalList(), pipelineLoad.getProject().getIntervalList());
+ Assert.assertEquals(pipeline.getProject().getDbsnpFile(), pipelineLoad.getProject().getDbsnpFile());
+
+ Assert.assertEquals(pipeline.getProject().getTags().size(), pipelineLoad.getProject().getTags().size());
+ for (Map.Entry entry : pipeline.getProject().getTags().entrySet())
+ Assert.assertEquals(entry.getValue(), pipeline.getProject().getTags().get(entry.getKey()));
+
+ Assert.assertEquals(pipeline.getSamples().size(), pipelineLoad.getSamples().size());
+ for (int i = 0; i < pipeline.getSamples().size(); i++) {
+ PipelineSample pipelineSample = pipeline.getSamples().get(i);
+ PipelineSample pipelineLoadSample = pipelineLoad.getSamples().get(i);
+
+ Assert.assertEquals(pipelineSample.getId(), pipelineLoadSample.getId());
+
+ Assert.assertEquals(pipelineSample.getBamFiles().size(), pipelineLoadSample.getBamFiles().size());
+ for (Map.Entry entry : pipelineSample.getBamFiles().entrySet())
+ Assert.assertEquals(entry.getValue(), pipelineSample.getBamFiles().get(entry.getKey()));
+
+ Assert.assertEquals(pipelineSample.getTags().size(), pipelineLoadSample.getTags().size());
+ for (Map.Entry entry : pipelineSample.getTags().entrySet())
+ Assert.assertEquals(entry.getValue(), pipelineSample.getTags().get(entry.getKey()));
+ }
+ }
+}
diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q
index 402e9dc25..05fccf16c 100755
--- a/scala/qscript/fullCallingPipeline.q
+++ b/scala/qscript/fullCallingPipeline.q
@@ -1,24 +1,24 @@
+import org.broadinstitute.sting.datasources.pipeline.Pipeline
import org.broadinstitute.sting.gatk.DownsampleType
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
import org.broadinstitute.sting.queue.extensions.samtools._
import org.broadinstitute.sting.queue.{QException, QScript}
+import collection.JavaConversions._
+import org.broadinstitute.sting.utils.yaml.YamlUtils
class fullCallingPipeline extends QScript {
qscript =>
- @Argument(doc = "reference", shortName="R")
- var reference: File = _
-
@Argument(doc="contigIntervals", shortName="contigIntervals")
var contigIntervals: File = _
@Argument(doc="numContigs", shortName="numContigs")
var numContigs: Int = _
- @Argument(doc="project", shortName="project")
- var project: String = _
+ @Argument(fullName="pipeline_yaml", shortName="PY", doc="Pipeline YAML file")
+ var pipelineYamlFile: File = _
@Input(doc="trigger", shortName="trigger", required=false)
var trigger: File = _
@@ -35,12 +35,6 @@ class fullCallingPipeline extends QScript {
@Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
var picardFixMatesJar: File = _
- @Input(doc="intervals")
- var intervals: File = _
-
- @Input(doc="bam files", shortName="I")
- var bamFiles: List[File] = Nil
-
@Input(doc="gatk jar")
var gatkJar: File = _
@@ -50,9 +44,6 @@ class fullCallingPipeline extends QScript {
@Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false)
var snpClusterWindow = 7
- @Input(doc="dbSNP version",shortName="D")
- var dbSNP: File = _
-
@Input(doc="target titv for recalibration",shortName="titv",required=false)
var target_titv = 2.1
@@ -65,11 +56,12 @@ class fullCallingPipeline extends QScript {
@Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false)
var num_indel_scatter_jobs = 5
+ private var pipeline: Pipeline = _
trait CommandLineGATKArgs extends CommandLineGATK {
- this.intervals = qscript.intervals
+ this.intervals = qscript.pipeline.getProject.getIntervalList
this.jarFile = qscript.gatkJar
- this.reference_sequence = qscript.reference
+ this.reference_sequence = qscript.pipeline.getProject.getReferenceFile
}
@@ -77,20 +69,26 @@ class fullCallingPipeline extends QScript {
def script = {
- val projectBase: String = qscript.project
+ pipeline = YamlUtils.load(classOf[Pipeline], qscript.pipelineYamlFile)
+ val projectBase: String = qscript.pipeline.getProject.getName
val cleanedBase: String = projectBase + ".cleaned"
val uncleanedBase: String = projectBase + ".uncleaned"
// there are commands that use all the bam files
- var cleanBamFiles = List.empty[File]
+ val recalibratedSamples = qscript.pipeline.getSamples
+ .filter(_.getBamFiles.contains("recalibrated"))
- for ( bam <- qscript.bamFiles ) {
+ for ( sample <- recalibratedSamples ) {
// put unclean bams in unclean genotypers
// in advance, create the extension files
+ val bam = sample.getBamFiles.get("recalibrated")
+ if (!sample.getBamFiles.contains("cleaned"))
+ sample.getBamFiles.put("cleaned", swapExt(bam,"bam","cleaned.bam"))
+ val cleaned_bam = sample.getBamFiles.get("cleaned")
+
val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list")
- val cleaned_bam = swapExt(bam,"bam","cleaned.bam") // note-- the scatter is in the definition itself
// create the cleaning commands
@@ -131,8 +129,6 @@ class fullCallingPipeline extends QScript {
gather.jarFile = qscript.picardFixMatesJar
// Don't pass this AS=true to fix mates!
gather.assumeSorted = None
- case (gather: SimpleTextGatherFunction, _) =>
- throw new QException("Cannot text-gather a realignment job")
}
} else {
realigner.out = swapExt(bam,"bam","unfixed.cleaned.bam")
@@ -149,10 +145,6 @@ class fullCallingPipeline extends QScript {
var samtoolsindex = new SamtoolsIndexFunction
samtoolsindex.bamFile = cleaned_bam
- // put clean bams in clean genotypers
-
- cleanBamFiles :+= cleaned_bam
-
// COMMENT THIS NEXT BLOCK TO SKIP CLEANING
if ( realigner.scatterCount > 1 )
add(targetCreator,realigner,samtoolsindex)
@@ -160,8 +152,17 @@ class fullCallingPipeline extends QScript {
add(targetCreator,realigner,fixMates,samtoolsindex)
}
+ val recalibratedBamFiles = recalibratedSamples
+ .map(_.getBamFiles.get("recalibrated"))
+ .toList
+
+ val cleanBamFiles = qscript.pipeline.getSamples
+ .filter(_.getBamFiles.contains("cleaned"))
+ .map(_.getBamFiles.get("cleaned"))
+ .toList
+
// actually make calls
- endToEnd(uncleanedBase,qscript.bamFiles)
+ endToEnd(uncleanedBase,recalibratedBamFiles)
// COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES
endToEnd(cleanedBase,cleanBamFiles)
}
@@ -218,7 +219,7 @@ class fullCallingPipeline extends QScript {
loopNo += 1
}
val mergeIndels = new CombineVariants with CommandLineGATKArgs
- mergeIndels.out = new TaggedFile(qscript.project+".indels.vcf","vcf")
+ mergeIndels.out = new TaggedFile(qscript.pipeline.getProject.getName+".indels.vcf","vcf")
mergeIndels.genotypemergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.GenotypeMergeType.UNIQUIFY)
mergeIndels.priority = priority
mergeIndels.variantmergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION)
@@ -259,7 +260,7 @@ class fullCallingPipeline extends QScript {
// todo -- args for resources (properties file)
val clusters = new GenerateVariantClusters with CommandLineGATKArgs
clusters.rodBind :+= RodBind("input", "VCF", masker.out)
- clusters.DBSNP = qscript.dbSNP
+ clusters.DBSNP = qscript.pipeline.getProject.getDbsnpFile
val clusters_clusterFile = swapExt(new File(snps.out.getAbsolutePath),".vcf",".cluster")
clusters.clusterFile = clusters_clusterFile
clusters.memoryLimit = Some(4)
@@ -271,7 +272,7 @@ class fullCallingPipeline extends QScript {
// 3.ii apply gaussian clusters to the masked vcf
val recalibrate = new VariantRecalibrator with CommandLineGATKArgs
recalibrate.clusterFile = clusters.clusterFile
- recalibrate.DBSNP = qscript.dbSNP
+ recalibrate.DBSNP = qscript.pipeline.getProject.getDbsnpFile
recalibrate.rodBind :+= RodBind("input", "VCF", masker.out)
recalibrate.out = swapExt(masker.out,".vcf",".recalibrated.vcf")
recalibrate.target_titv = qscript.target_titv
diff --git a/shell/getFirehosePipelineYaml.sh b/shell/getFirehosePipelineYaml.sh
new file mode 100644
index 000000000..6966c2785
--- /dev/null
+++ b/shell/getFirehosePipelineYaml.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+
+# Downloads a set of samples from Firehose using the Firehose Test Harness and awk to generate a YAML file.
+
+ENTITY_SET_ID=$1
+ENTITY_SET_TYPE=Sample_Set
+ENTITY_TYPE=Sample
+
+if [ "$ENTITY_SET_ID" == "" ]; then
+ echo "Usage: $0 " >&2
+ exit 1
+fi
+
+# Firehose variables
+
+FIREHOSE_SOURCE_HOME=/humgen/gsa-firehose/firehose/source
+CGA_HOME=$FIREHOSE_SOURCE_HOME/CancerGenomeAnalysis
+FIREHOSE_TEST_HARNESS="python $CGA_HOME/analysis_pipeline/scripts/firehose_test_harness.py"
+FIREHOSE_HOST=firehose
+FIREHOSE_PORT=8080
+FIREHOSE_DOMAIN=gsa
+FIREHOSE_WORKSPACE=trunk
+
+# YAML file to write
+
+PIPELINE_YAML_FILE=$ENTITY_SET_ID.yaml
+
+# Annotations to pull down from Firehose
+
+FIREHOSE_ANNOTATIONS=(reference_file dbsnp_file interval_list \
+ sample_id recalibrated_bam_file squid_project collaborator_id)
+
+# YAML templates
+
+PROJECT_YAML_TEMPLATE='" \
+ project: { \
+ name: '"$ENTITY_SET_ID"', \
+ referenceFile: %s, \
+ dbsnpFile: %s, \
+ intervalList: %s \
+ },", $1, $2, $3'
+
+SAMPLE_YAML_TEMPLATE='" \
+ { \
+ id: %s, \
+ bamFiles: { recalibrated: %s }, \
+ tags: { \
+ SQUIDProject: %s, \
+ CollaboratorID: %s \
+ } \
+ }", $4, $5, $6, $7'
+
+index=0
+count=${#FIREHOSE_ANNOTATIONS[@]}
+FIREHOSE_VARIABLES=""
+TAB=' '
+
+# Build the tab separated list of firehose arguments
+
+while [ "$index" -lt "$count" ]; do
+ if [ "$FIREHOSE_VARIABLES" != "" ]; then
+ FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES$TAB
+ fi
+ FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES'${'${FIREHOSE_ANNOTATIONS[$index]}'}'
+ let "index = $index + 1"
+done
+
+# Retrieve all the required variables and run the pipeline in Queue.
+$FIREHOSE_TEST_HARNESS \
+ -d $FIREHOSE_DOMAIN -w $FIREHOSE_WORKSPACE \
+ -t $ENTITY_TYPE -f $ENTITY_SET_ID -y $ENTITY_SET_TYPE \
+ "echo '$FIREHOSE_VARIABLES'" && \
+\
+# Generate yaml from firehose output
+. firehose-populated-commands.sh | awk '
+BEGIN {
+ printf "{"
+}
+{
+ if (NR == 1) {
+ printf '"$PROJECT_YAML_TEMPLATE"'
+ printf "\n samples: ["
+ } else {
+ printf ","
+ }
+ printf '"$SAMPLE_YAML_TEMPLATE"'
+}
+END {
+ if (NR > 0)
+ printf "\n ]"
+ print "\n}"
+}' > $PIPELINE_YAML_FILE