Updated from SnakeYAML 1.6 to 1.7.
Added a pipeline java bean and YAML utility to serialize java beans. Added a getFirehosePipelineYaml.sh that can pull firehose data into the pipeline yaml file format. Updated the fullCallingPipeline.q to begin using the pipeline yaml file format for bams and reference. More changes to come as this code gets tested out in the fullCallingPipeline. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4329 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d9b8fa2acc
commit
20b38b38f3
2
ivy.xml
2
ivy.xml
|
|
@ -17,7 +17,7 @@
|
|||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||
<dependency org="org.yaml" name="snakeyaml" rev="1.6"/>
|
||||
<dependency org="org.yaml" name="snakeyaml" rev="1.7"/>
|
||||
|
||||
<!-- Dependencies for reflections mvn repository -->
|
||||
<dependency org="org.reflections" name="reflections" rev="0.9.5-RC2"/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Java bean for storing a list of samples for a pipeline.
|
||||
*
|
||||
* NOTE: This class is used in a very similar way to the classes in
|
||||
* org.broadinstitute.sting.gatk.datasources.sample.
|
||||
*
|
||||
* Both store / load sample information from the file system as YAML.
|
||||
*
|
||||
* This package will likely be refactored to share common functionality
|
||||
* with the other at a future date as requirements coalesce.
|
||||
*
|
||||
* - kshakir September 22, 2010
|
||||
*/
|
||||
public class Pipeline {
|
||||
private PipelineProject project = new PipelineProject();
|
||||
private List<PipelineSample> samples = new ArrayList<PipelineSample>();
|
||||
|
||||
public PipelineProject getProject() {
|
||||
return project;
|
||||
}
|
||||
|
||||
public void setProject(PipelineProject project) {
|
||||
this.project = project;
|
||||
}
|
||||
|
||||
public List<PipelineSample> getSamples() {
|
||||
return samples;
|
||||
}
|
||||
|
||||
public void setSamples(List<PipelineSample> samples) {
|
||||
this.samples = samples;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Java bean defining the project for a pipeline.
|
||||
*/
|
||||
public class PipelineProject {
|
||||
private String name;
|
||||
private File referenceFile;
|
||||
private File intervalList;
|
||||
private File dbsnpFile;
|
||||
private Map<String, String> tags = new TreeMap<String, String>();
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public File getIntervalList() {
|
||||
return intervalList;
|
||||
}
|
||||
|
||||
public void setIntervalList(File intervalList) {
|
||||
this.intervalList = intervalList;
|
||||
}
|
||||
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
public void setReferenceFile(File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
public File getDbsnpFile() {
|
||||
return dbsnpFile;
|
||||
}
|
||||
|
||||
public void setDbsnpFile(File dbsnpFile) {
|
||||
this.dbsnpFile = dbsnpFile;
|
||||
}
|
||||
|
||||
public Map<String, String> getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void setTags(Map<String, String> tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Java bean defining a sample for a pipeline.
|
||||
*/
|
||||
public class PipelineSample {
|
||||
private String id;
|
||||
private Map<String, File> bamFiles = new TreeMap<String, File>();
|
||||
private Map<String, String> tags = new TreeMap<String, String>();
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Map<String, File> getBamFiles() {
|
||||
return bamFiles;
|
||||
}
|
||||
|
||||
public void setBamFiles(Map<String, File> bamFiles) {
|
||||
this.bamFiles = bamFiles;
|
||||
}
|
||||
|
||||
public Map<String, String> getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void setTags(Map<String, String> tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,6 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.yaml.snakeyaml.Loader;
|
||||
import org.yaml.snakeyaml.TypeDescription;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.constructor.Constructor;
|
||||
|
|
@ -115,8 +114,7 @@ public class SampleDataSource {
|
|||
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
|
||||
desc.putListPropertyType("sampleAliases", SampleAlias.class);
|
||||
con.addTypeDescription(desc);
|
||||
Loader loader = new Loader(con);
|
||||
Yaml yaml = new Yaml(loader);
|
||||
Yaml yaml = new Yaml(con);
|
||||
|
||||
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
|
||||
SampleFileParser parser;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.yaml;
|
||||
|
||||
import org.yaml.snakeyaml.introspector.Property;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Orders properties based on the order of the fields in the Java Bean.
|
||||
*/
|
||||
class FieldOrderComparator implements Comparator<Property> {
|
||||
private final List<String> propertyOrder;
|
||||
|
||||
public FieldOrderComparator(Class<?> clazz) {
|
||||
propertyOrder = new ArrayList<String>();
|
||||
for (Field field : clazz.getDeclaredFields())
|
||||
propertyOrder.add(field.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Property one, Property two) {
|
||||
Integer index1 = propertyOrder.indexOf(one.getName());
|
||||
Integer index2 = propertyOrder.indexOf(two.getName());
|
||||
return index1.compareTo(index2);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.yaml;
|
||||
|
||||
import org.yaml.snakeyaml.introspector.Property;
|
||||
import org.yaml.snakeyaml.nodes.*;
|
||||
import org.yaml.snakeyaml.representer.Represent;
|
||||
import org.yaml.snakeyaml.representer.Representer;
|
||||
|
||||
import java.beans.IntrospectionException;
|
||||
import java.io.File;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* A representer with Sting prefered settings.
|
||||
* - Fields are ordered in the order of the class declaration, instead of alphabetically.
|
||||
* - Empty maps and sequences are not output.
|
||||
* - Files are converted to their absolute paths.
|
||||
*/
|
||||
public class StingYamlRepresenter extends Representer {
|
||||
|
||||
public StingYamlRepresenter() {
|
||||
super();
|
||||
this.representers.put(File.class, new RepresentFile());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<Property> getProperties(Class<?> type) throws IntrospectionException {
|
||||
TreeSet<Property> properties = new TreeSet<Property>(new FieldOrderComparator(type));
|
||||
properties.addAll(super.getProperties(type));
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NodeTuple representJavaBeanProperty(Object javaBean, Property property,
|
||||
Object propertyValue, Tag customTag) {
|
||||
NodeTuple tuple = super.representJavaBeanProperty(javaBean, property, propertyValue, customTag);
|
||||
Node valueNode = tuple.getValueNode();
|
||||
if (Tag.NULL.equals(valueNode.getTag())) {
|
||||
return null;// skip 'null' values
|
||||
}
|
||||
if (valueNode instanceof CollectionNode) {
|
||||
if (Tag.SEQ.equals(valueNode.getTag())) {
|
||||
SequenceNode seq = (SequenceNode) valueNode;
|
||||
if (seq.getValue().isEmpty()) {
|
||||
return null;// skip empty lists
|
||||
}
|
||||
}
|
||||
if (Tag.MAP.equals(valueNode.getTag())) {
|
||||
MappingNode seq = (MappingNode) valueNode;
|
||||
if (seq.getValue().isEmpty()) {
|
||||
return null;// skip empty maps
|
||||
}
|
||||
}
|
||||
}
|
||||
return tuple;
|
||||
}
|
||||
|
||||
private class RepresentFile implements Represent {
|
||||
@Override
|
||||
public Node representData(Object o) {
|
||||
return StingYamlRepresenter.this.representScalar(Tag.STR, ((File)o).getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.yaml;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.yaml.snakeyaml.DumperOptions;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.constructor.Constructor;
|
||||
import org.yaml.snakeyaml.nodes.Tag;
|
||||
import org.yaml.snakeyaml.representer.Representer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A collection of utilities for operating on YAML.
|
||||
* Uses the FLOW style of writing YAML, versus the BLOCK style.
|
||||
* By default uses a representer that prunes empty lists and maps.
|
||||
*/
|
||||
public class YamlUtils {
|
||||
private static Representer representer = new StingYamlRepresenter();
|
||||
private static DumperOptions options = new DumperOptions();
|
||||
|
||||
static {
|
||||
options.setCanonical(false);
|
||||
options.setExplicitRoot(Tag.MAP);
|
||||
options.setDefaultFlowStyle(DumperOptions.FlowStyle.FLOW);
|
||||
options.setPrettyFlow(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize an object to the file system.
|
||||
* @param o Object to serialize.
|
||||
* @param file Path to write the serialized YAML.
|
||||
*/
|
||||
public static void dump(Object o, File file) {
|
||||
dump(o, file, representer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize an object to the file system.
|
||||
* @param o Object to serialize.
|
||||
* @param file Path to write the serialized YAML.
|
||||
* @param representer Custom representer with rules on how to serialize YAML.
|
||||
*/
|
||||
public static void dump(Object o, File file, Representer representer) {
|
||||
Constructor constructor = new Constructor(o.getClass());
|
||||
Yaml yaml = new Yaml(constructor, representer, options);
|
||||
try {
|
||||
yaml.dump(o, new FileWriter(file));
|
||||
} catch (IOException ioe) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserialize an object from the file system.
|
||||
* @param clazz Clazz to deserialize.
|
||||
* @param file Path to read the deserialized YAML.
|
||||
* @return Object deserialized from the file system.
|
||||
*/
|
||||
public static <T> T load(Class<? extends T> clazz, File file) {
|
||||
return load(clazz, file, representer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserialize an object from the file system.
|
||||
* @param clazz Clazz to deserialize.
|
||||
* @param file Path to read the deserialized YAML.
|
||||
* @param representer Custom representer with rules on how to deserialize YAML.
|
||||
* @return Object deserialized from the file system.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T load(Class<? extends T> clazz, File file, Representer representer) {
|
||||
Constructor constructor = new Constructor(clazz);
|
||||
Yaml yaml = new Yaml(constructor, representer, options);
|
||||
try {
|
||||
return (T) yaml.load(new FileReader(file));
|
||||
} catch (IOException ioe) {
|
||||
throw new UserException.CouldNotReadInputFile(file, ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
public class PipelineUnitTest {
|
||||
@Test
|
||||
public void testDumpAndLoad() throws Exception {
|
||||
Pipeline pipeline = new Pipeline();
|
||||
|
||||
pipeline.getProject().setName("PRJ_NAME");
|
||||
pipeline.getProject().setReferenceFile(new File("my.fasta"));
|
||||
pipeline.getProject().setDbsnpFile(new File("my.dbsnp"));
|
||||
pipeline.getProject().getTags().put("testProjectTag", "project value here");
|
||||
|
||||
PipelineSample sample = new PipelineSample();
|
||||
sample.setId("SMP_ID");
|
||||
sample.getBamFiles().put("recalibrated", new File("recalibrated.bam"));
|
||||
sample.getBamFiles().put("cleaned", new File("/absolute/path/to/cleaned.bam"));
|
||||
sample.getTags().put("testSampleTag", "sample value here");
|
||||
|
||||
pipeline.getSamples().add(sample);
|
||||
|
||||
File file = File.createTempFile("testDumpAndLoad", ".yaml");
|
||||
YamlUtils.dump(pipeline, file);
|
||||
Pipeline pipelineLoad = YamlUtils.load(Pipeline.class, file);
|
||||
|
||||
Assert.assertEquals(pipeline.getProject().getName(), pipelineLoad.getProject().getName());
|
||||
Assert.assertEquals(pipeline.getProject().getReferenceFile(), pipelineLoad.getProject().getReferenceFile());
|
||||
Assert.assertEquals(pipeline.getProject().getIntervalList(), pipelineLoad.getProject().getIntervalList());
|
||||
Assert.assertEquals(pipeline.getProject().getDbsnpFile(), pipelineLoad.getProject().getDbsnpFile());
|
||||
|
||||
Assert.assertEquals(pipeline.getProject().getTags().size(), pipelineLoad.getProject().getTags().size());
|
||||
for (Map.Entry<String, String> entry : pipeline.getProject().getTags().entrySet())
|
||||
Assert.assertEquals(entry.getValue(), pipeline.getProject().getTags().get(entry.getKey()));
|
||||
|
||||
Assert.assertEquals(pipeline.getSamples().size(), pipelineLoad.getSamples().size());
|
||||
for (int i = 0; i < pipeline.getSamples().size(); i++) {
|
||||
PipelineSample pipelineSample = pipeline.getSamples().get(i);
|
||||
PipelineSample pipelineLoadSample = pipelineLoad.getSamples().get(i);
|
||||
|
||||
Assert.assertEquals(pipelineSample.getId(), pipelineLoadSample.getId());
|
||||
|
||||
Assert.assertEquals(pipelineSample.getBamFiles().size(), pipelineLoadSample.getBamFiles().size());
|
||||
for (Map.Entry<String, File> entry : pipelineSample.getBamFiles().entrySet())
|
||||
Assert.assertEquals(entry.getValue(), pipelineSample.getBamFiles().get(entry.getKey()));
|
||||
|
||||
Assert.assertEquals(pipelineSample.getTags().size(), pipelineLoadSample.getTags().size());
|
||||
for (Map.Entry<String, String> entry : pipelineSample.getTags().entrySet())
|
||||
Assert.assertEquals(entry.getValue(), pipelineSample.getTags().get(entry.getKey()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,24 +1,24 @@
|
|||
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
||||
import org.broadinstitute.sting.gatk.DownsampleType
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
||||
import org.broadinstitute.sting.queue.extensions.samtools._
|
||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||
|
||||
class fullCallingPipeline extends QScript {
|
||||
qscript =>
|
||||
|
||||
@Argument(doc = "reference", shortName="R")
|
||||
var reference: File = _
|
||||
|
||||
@Argument(doc="contigIntervals", shortName="contigIntervals")
|
||||
var contigIntervals: File = _
|
||||
|
||||
@Argument(doc="numContigs", shortName="numContigs")
|
||||
var numContigs: Int = _
|
||||
|
||||
@Argument(doc="project", shortName="project")
|
||||
var project: String = _
|
||||
@Argument(fullName="pipeline_yaml", shortName="PY", doc="Pipeline YAML file")
|
||||
var pipelineYamlFile: File = _
|
||||
|
||||
@Input(doc="trigger", shortName="trigger", required=false)
|
||||
var trigger: File = _
|
||||
|
|
@ -35,12 +35,6 @@ class fullCallingPipeline extends QScript {
|
|||
@Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
|
||||
var picardFixMatesJar: File = _
|
||||
|
||||
@Input(doc="intervals")
|
||||
var intervals: File = _
|
||||
|
||||
@Input(doc="bam files", shortName="I")
|
||||
var bamFiles: List[File] = Nil
|
||||
|
||||
@Input(doc="gatk jar")
|
||||
var gatkJar: File = _
|
||||
|
||||
|
|
@ -50,9 +44,6 @@ class fullCallingPipeline extends QScript {
|
|||
@Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false)
|
||||
var snpClusterWindow = 7
|
||||
|
||||
@Input(doc="dbSNP version",shortName="D")
|
||||
var dbSNP: File = _
|
||||
|
||||
@Input(doc="target titv for recalibration",shortName="titv",required=false)
|
||||
var target_titv = 2.1
|
||||
|
||||
|
|
@ -65,11 +56,12 @@ class fullCallingPipeline extends QScript {
|
|||
@Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false)
|
||||
var num_indel_scatter_jobs = 5
|
||||
|
||||
private var pipeline: Pipeline = _
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.intervals = qscript.intervals
|
||||
this.intervals = qscript.pipeline.getProject.getIntervalList
|
||||
this.jarFile = qscript.gatkJar
|
||||
this.reference_sequence = qscript.reference
|
||||
this.reference_sequence = qscript.pipeline.getProject.getReferenceFile
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -77,20 +69,26 @@ class fullCallingPipeline extends QScript {
|
|||
|
||||
|
||||
def script = {
|
||||
val projectBase: String = qscript.project
|
||||
pipeline = YamlUtils.load(classOf[Pipeline], qscript.pipelineYamlFile)
|
||||
val projectBase: String = qscript.pipeline.getProject.getName
|
||||
val cleanedBase: String = projectBase + ".cleaned"
|
||||
val uncleanedBase: String = projectBase + ".uncleaned"
|
||||
// there are commands that use all the bam files
|
||||
var cleanBamFiles = List.empty[File]
|
||||
val recalibratedSamples = qscript.pipeline.getSamples
|
||||
.filter(_.getBamFiles.contains("recalibrated"))
|
||||
|
||||
for ( bam <- qscript.bamFiles ) {
|
||||
for ( sample <- recalibratedSamples ) {
|
||||
|
||||
// put unclean bams in unclean genotypers
|
||||
|
||||
// in advance, create the extension files
|
||||
|
||||
val bam = sample.getBamFiles.get("recalibrated")
|
||||
if (!sample.getBamFiles.contains("cleaned"))
|
||||
sample.getBamFiles.put("cleaned", swapExt(bam,"bam","cleaned.bam"))
|
||||
val cleaned_bam = sample.getBamFiles.get("cleaned")
|
||||
|
||||
val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list")
|
||||
val cleaned_bam = swapExt(bam,"bam","cleaned.bam") // note-- the scatter is in the definition itself
|
||||
|
||||
// create the cleaning commands
|
||||
|
||||
|
|
@ -131,8 +129,6 @@ class fullCallingPipeline extends QScript {
|
|||
gather.jarFile = qscript.picardFixMatesJar
|
||||
// Don't pass this AS=true to fix mates!
|
||||
gather.assumeSorted = None
|
||||
case (gather: SimpleTextGatherFunction, _) =>
|
||||
throw new QException("Cannot text-gather a realignment job")
|
||||
}
|
||||
} else {
|
||||
realigner.out = swapExt(bam,"bam","unfixed.cleaned.bam")
|
||||
|
|
@ -149,10 +145,6 @@ class fullCallingPipeline extends QScript {
|
|||
var samtoolsindex = new SamtoolsIndexFunction
|
||||
samtoolsindex.bamFile = cleaned_bam
|
||||
|
||||
// put clean bams in clean genotypers
|
||||
|
||||
cleanBamFiles :+= cleaned_bam
|
||||
|
||||
// COMMENT THIS NEXT BLOCK TO SKIP CLEANING
|
||||
if ( realigner.scatterCount > 1 )
|
||||
add(targetCreator,realigner,samtoolsindex)
|
||||
|
|
@ -160,8 +152,17 @@ class fullCallingPipeline extends QScript {
|
|||
add(targetCreator,realigner,fixMates,samtoolsindex)
|
||||
}
|
||||
|
||||
val recalibratedBamFiles = recalibratedSamples
|
||||
.map(_.getBamFiles.get("recalibrated"))
|
||||
.toList
|
||||
|
||||
val cleanBamFiles = qscript.pipeline.getSamples
|
||||
.filter(_.getBamFiles.contains("cleaned"))
|
||||
.map(_.getBamFiles.get("cleaned"))
|
||||
.toList
|
||||
|
||||
// actually make calls
|
||||
endToEnd(uncleanedBase,qscript.bamFiles)
|
||||
endToEnd(uncleanedBase,recalibratedBamFiles)
|
||||
// COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES
|
||||
endToEnd(cleanedBase,cleanBamFiles)
|
||||
}
|
||||
|
|
@ -218,7 +219,7 @@ class fullCallingPipeline extends QScript {
|
|||
loopNo += 1
|
||||
}
|
||||
val mergeIndels = new CombineVariants with CommandLineGATKArgs
|
||||
mergeIndels.out = new TaggedFile(qscript.project+".indels.vcf","vcf")
|
||||
mergeIndels.out = new TaggedFile(qscript.pipeline.getProject.getName+".indels.vcf","vcf")
|
||||
mergeIndels.genotypemergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.GenotypeMergeType.UNIQUIFY)
|
||||
mergeIndels.priority = priority
|
||||
mergeIndels.variantmergeoption = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION)
|
||||
|
|
@ -259,7 +260,7 @@ class fullCallingPipeline extends QScript {
|
|||
// todo -- args for resources (properties file)
|
||||
val clusters = new GenerateVariantClusters with CommandLineGATKArgs
|
||||
clusters.rodBind :+= RodBind("input", "VCF", masker.out)
|
||||
clusters.DBSNP = qscript.dbSNP
|
||||
clusters.DBSNP = qscript.pipeline.getProject.getDbsnpFile
|
||||
val clusters_clusterFile = swapExt(new File(snps.out.getAbsolutePath),".vcf",".cluster")
|
||||
clusters.clusterFile = clusters_clusterFile
|
||||
clusters.memoryLimit = Some(4)
|
||||
|
|
@ -271,7 +272,7 @@ class fullCallingPipeline extends QScript {
|
|||
// 3.ii apply gaussian clusters to the masked vcf
|
||||
val recalibrate = new VariantRecalibrator with CommandLineGATKArgs
|
||||
recalibrate.clusterFile = clusters.clusterFile
|
||||
recalibrate.DBSNP = qscript.dbSNP
|
||||
recalibrate.DBSNP = qscript.pipeline.getProject.getDbsnpFile
|
||||
recalibrate.rodBind :+= RodBind("input", "VCF", masker.out)
|
||||
recalibrate.out = swapExt(masker.out,".vcf",".recalibrated.vcf")
|
||||
recalibrate.target_titv = qscript.target_titv
|
||||
|
|
|
|||
|
|
@ -0,0 +1,92 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Downloads a set of samples from Firehose using the Firehose Test Harness and awk to generate a YAML file.
|
||||
|
||||
ENTITY_SET_ID=$1
|
||||
ENTITY_SET_TYPE=Sample_Set
|
||||
ENTITY_TYPE=Sample
|
||||
|
||||
if [ "$ENTITY_SET_ID" == "" ]; then
|
||||
echo "Usage: $0 <Sample_Set_Name>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Firehose variables
|
||||
|
||||
FIREHOSE_SOURCE_HOME=/humgen/gsa-firehose/firehose/source
|
||||
CGA_HOME=$FIREHOSE_SOURCE_HOME/CancerGenomeAnalysis
|
||||
FIREHOSE_TEST_HARNESS="python $CGA_HOME/analysis_pipeline/scripts/firehose_test_harness.py"
|
||||
FIREHOSE_HOST=firehose
|
||||
FIREHOSE_PORT=8080
|
||||
FIREHOSE_DOMAIN=gsa
|
||||
FIREHOSE_WORKSPACE=trunk
|
||||
|
||||
# YAML file to write
|
||||
|
||||
PIPELINE_YAML_FILE=$ENTITY_SET_ID.yaml
|
||||
|
||||
# Annotations to pull down from Firehose
|
||||
|
||||
FIREHOSE_ANNOTATIONS=(reference_file dbsnp_file interval_list \
|
||||
sample_id recalibrated_bam_file squid_project collaborator_id)
|
||||
|
||||
# YAML templates
|
||||
|
||||
PROJECT_YAML_TEMPLATE='" \
|
||||
project: { \
|
||||
name: '"$ENTITY_SET_ID"', \
|
||||
referenceFile: %s, \
|
||||
dbsnpFile: %s, \
|
||||
intervalList: %s \
|
||||
},", $1, $2, $3'
|
||||
|
||||
SAMPLE_YAML_TEMPLATE='" \
|
||||
{ \
|
||||
id: %s, \
|
||||
bamFiles: { recalibrated: %s }, \
|
||||
tags: { \
|
||||
SQUIDProject: %s, \
|
||||
CollaboratorID: %s \
|
||||
} \
|
||||
}", $4, $5, $6, $7'
|
||||
|
||||
index=0
|
||||
count=${#FIREHOSE_ANNOTATIONS[@]}
|
||||
FIREHOSE_VARIABLES=""
|
||||
TAB=' '
|
||||
|
||||
# Build the tab separated list of firehose arguments
|
||||
|
||||
while [ "$index" -lt "$count" ]; do
|
||||
if [ "$FIREHOSE_VARIABLES" != "" ]; then
|
||||
FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES$TAB
|
||||
fi
|
||||
FIREHOSE_VARIABLES=$FIREHOSE_VARIABLES'${'${FIREHOSE_ANNOTATIONS[$index]}'}'
|
||||
let "index = $index + 1"
|
||||
done
|
||||
|
||||
# Retrieve all the required variables and run the pipeline in Queue.
|
||||
$FIREHOSE_TEST_HARNESS \
|
||||
-d $FIREHOSE_DOMAIN -w $FIREHOSE_WORKSPACE \
|
||||
-t $ENTITY_TYPE -f $ENTITY_SET_ID -y $ENTITY_SET_TYPE \
|
||||
"echo '$FIREHOSE_VARIABLES'" && \
|
||||
\
|
||||
# Generate yaml from firehose output
|
||||
. firehose-populated-commands.sh | awk '
|
||||
BEGIN {
|
||||
printf "{"
|
||||
}
|
||||
{
|
||||
if (NR == 1) {
|
||||
printf '"$PROJECT_YAML_TEMPLATE"'
|
||||
printf "\n samples: ["
|
||||
} else {
|
||||
printf ","
|
||||
}
|
||||
printf '"$SAMPLE_YAML_TEMPLATE"'
|
||||
}
|
||||
END {
|
||||
if (NR > 0)
|
||||
printf "\n ]"
|
||||
print "\n}"
|
||||
}' > $PIPELINE_YAML_FILE
|
||||
Loading…
Reference in New Issue