Started the process to make the GATK engine into a runnable object so we can call it from other processes. Step 1: make a configuration object that can serialize to and from an XML file. This way we can store the information everyone uses shell scripts for. Also we can now pull the list of params out of the GenomeAnalysisTK.java. More to come...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@636 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
226edbdef6
commit
bae4256574
|
|
@ -0,0 +1,272 @@
|
|||
package org.broadinstitute.sting.utils.cmdLine;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.simpleframework.xml.*;
|
||||
import org.simpleframework.xml.core.Persister;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: May 7, 2009
|
||||
* Time: 11:46:21 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date May 7, 2009
|
||||
* <p/>
|
||||
* Class ArgumentObject
|
||||
* <p/>
|
||||
* Encapsolute the massively large list of possible parameters we take in the Genome Analysis tool
|
||||
*/
|
||||
@Root
|
||||
public class GATKArgumentCollection {
|
||||
|
||||
/* our version number */
|
||||
private float versionNumber = 1;
|
||||
private String description = "GATK Arguments";
|
||||
|
||||
/** the constructor */
|
||||
public GATKArgumentCollection() {
|
||||
}
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementList
|
||||
@Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
|
||||
public List<File> samFiles = new ArrayList<File>();
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
|
||||
public String maximumReads = "-1";
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation (LENIENT|SILENT|STRICT)", required = false)
|
||||
public String strictnessLevel = "strict";
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
public File referenceFile = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false)
|
||||
public String genomeRegion = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
|
||||
public String analysisName = null;
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true)
|
||||
public Map<String, String> walkerArgs = new HashMap<String, String>();
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
||||
public String DBSNPFile = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
|
||||
public String HAPMAPFile = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
|
||||
public String HAPMAPChipFile = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
|
||||
public Boolean EnabledThreadedIO = false;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
|
||||
public Boolean unsafe = false;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
||||
public String maximumReadSorts = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||
public String downsampleFraction = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
|
||||
public String downsampleCoverage = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := <contig> <start> <end>", required = false)
|
||||
public String intervalsFile = null;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
|
||||
public Boolean walkAllLoci = false;
|
||||
|
||||
@Element
|
||||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
||||
/** An output file presented to the walker. */
|
||||
@Element
|
||||
@Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
|
||||
public String outFileName = null;
|
||||
|
||||
/** An error output file presented to the walker. */
|
||||
@Element
|
||||
@Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
|
||||
public String errFileName = null;
|
||||
|
||||
/** A joint file for both 'normal' and error output presented to the walker. */
|
||||
@Element
|
||||
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
|
||||
public String outErrFileName = null;
|
||||
|
||||
/** How many threads should be allocated to this analysis. */
|
||||
@Element
|
||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
public int numberOfThreads = 1;
|
||||
|
||||
@ElementList
|
||||
@Argument(fullName = "rodBind", shortName = "B", doc = "", required = false)
|
||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* marshal the data out to a object
|
||||
*
|
||||
* @param collection the GATKArgumentCollection to load into
|
||||
* @param outputFile the file to write to
|
||||
*/
|
||||
public static void marshal(GATKArgumentCollection collection, String outputFile) {
|
||||
Serializer serializer = new Persister();
|
||||
File result = new File(outputFile);
|
||||
try {
|
||||
serializer.write(collection, result);
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Failed to marshal the data from the file " + outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unmashall the object from a configuration file
|
||||
*
|
||||
* @param filename the filename to marshal from
|
||||
*/
|
||||
public static GATKArgumentCollection unmarshal(String filename) {
|
||||
Serializer serializer = new Persister();
|
||||
File source = new File(filename);
|
||||
try {
|
||||
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
|
||||
return example;
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Failed to marshal the data to file " + filename, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test equality between two arg collections. This functions defines the statement:
|
||||
* "not fun to write"
|
||||
* @param other the other collection
|
||||
* @return true if they're equal
|
||||
*/
|
||||
public boolean equals(GATKArgumentCollection other) {
|
||||
if (other.samFiles.size() != samFiles.size()) {
|
||||
return false;
|
||||
}
|
||||
for (int x = 0; x < samFiles.size(); x++) {
|
||||
if (!samFiles.get(x).equals(other.samFiles.get(x))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (other.walkerArgs.size() != walkerArgs.size()) {
|
||||
return false;
|
||||
}
|
||||
for (String s : walkerArgs.keySet()) {
|
||||
if (!other.walkerArgs.containsKey(s)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (other.RODBindings.size() != RODBindings.size()) {
|
||||
return false;
|
||||
}
|
||||
for (int x = 0; x < RODBindings.size(); x++) {
|
||||
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!other.samFiles.equals(this.samFiles)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.maximumReads.equals(this.maximumReads)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.strictnessLevel.equals(this.strictnessLevel)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.referenceFile.equals(this.referenceFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.genomeRegion.equals(this.genomeRegion)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.analysisName.equals(this.analysisName)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.HAPMAPFile.equals(this.HAPMAPFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.EnabledThreadedIO.equals(this.EnabledThreadedIO)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.maximumReadSorts.equals(this.maximumReadSorts)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.downsampleFraction.equals(this.downsampleFraction)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.intervalsFile.equals(this.intervalsFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.outFileName.equals(this.outFileName)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.errFileName.equals(this.errFileName)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.outErrFileName.equals(this.outErrFileName)) {
|
||||
return false;
|
||||
}
|
||||
if (other.numberOfThreads != this.numberOfThreads) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
package org.broadinstitute.sting.utils.cmdLine;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.After;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: May 7, 2009
|
||||
* Time: 1:12:58 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date May 7, 2009
|
||||
* <p/>
|
||||
* Class GATKArgumentCollection
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GATKArgumentCollectionTest extends BaseTest {
|
||||
|
||||
// our collection of arguments
|
||||
private GATKArgumentCollection collect;
|
||||
|
||||
// where to write our xml file
|
||||
private String xmlFileLoc = "testfile.xml";
|
||||
|
||||
/** setup our test */
|
||||
@Before
|
||||
public void setup() {
|
||||
collect = new GATKArgumentCollection();
|
||||
}
|
||||
|
||||
/** destroy the temp file */
|
||||
@After
|
||||
public void takedown() {
|
||||
File f = new File(xmlFileLoc);
|
||||
if (f.exists()) {
|
||||
f.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupCollection() {
|
||||
// parameters and their defaults
|
||||
Map<String, String> wArgs = new HashMap<String, String>();
|
||||
wArgs.put("wArgType1", "Arg1");
|
||||
wArgs.put("wArgType2", "Arg2");
|
||||
wArgs.put("wArgType3", "Arg3");
|
||||
collect.walkerArgs = wArgs;
|
||||
|
||||
List<File> input = new ArrayList<File>();
|
||||
input.add(new File("test.file"));
|
||||
collect.samFiles = input;
|
||||
collect.maximumReads = "-1";
|
||||
collect.strictnessLevel = "strict";
|
||||
collect.referenceFile = new File("referenceFile".toLowerCase());
|
||||
collect.genomeRegion = "genomeRegion".toLowerCase();
|
||||
collect.analysisName = "analysisName".toLowerCase();
|
||||
collect.DBSNPFile = "DBSNPFile".toLowerCase();
|
||||
collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
|
||||
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
|
||||
collect.EnabledThreadedIO = true;
|
||||
collect.unsafe = false;
|
||||
collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
|
||||
collect.downsampleFraction = "downsampleFraction".toLowerCase();
|
||||
collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
|
||||
collect.intervalsFile = "intervalsFile".toLowerCase();
|
||||
collect.walkAllLoci = true;
|
||||
collect.disableThreading = false;
|
||||
collect.outFileName = "outFileName".toLowerCase();
|
||||
collect.errFileName = "errFileName".toLowerCase();
|
||||
collect.outErrFileName = "outErrFileName".toLowerCase();
|
||||
collect.numberOfThreads = 1;
|
||||
|
||||
// make some rod bindings up
|
||||
ArrayList<String> fakeBindings = new ArrayList<String>();
|
||||
fakeBindings.add("Bind1");
|
||||
fakeBindings.add("Bind2");
|
||||
fakeBindings.add("Bind3");
|
||||
|
||||
collect.RODBindings = fakeBindings;
|
||||
}
|
||||
|
||||
|
||||
/** test the output of an XML file in the arg collection */
|
||||
@Test
|
||||
public void testOutput() {
|
||||
setupCollection();
|
||||
|
||||
GATKArgumentCollection.marshal(collect, xmlFileLoc);
|
||||
GATKArgumentCollection collection = GATKArgumentCollection.unmarshal(xmlFileLoc);
|
||||
if (!collect.equals(collection)) {
|
||||
fail("Collections not equal");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** test the output of an XML file in the arg collection */
|
||||
@Test
|
||||
public void testInput() {
|
||||
setupCollection();
|
||||
GATKArgumentCollection.marshal(collect, xmlFileLoc);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue