Started the process to make the GATK engine into a runnable object so we can call it from other processes. Step 1: make a configuration object that can serialize to and from an XML file. This way we can store the information everyone uses shell scripts for. Also we can now pull the list of params out of the GenomeAnalysisTK.java. More to come...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@636 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-08 01:25:26 +00:00
parent 226edbdef6
commit bae4256574
2 changed files with 397 additions and 0 deletions

View File

@ -0,0 +1,272 @@
package org.broadinstitute.sting.utils.cmdLine;
import org.broadinstitute.sting.utils.StingException;
import org.simpleframework.xml.*;
import org.simpleframework.xml.core.Persister;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
/**
*
* User: aaron
* Date: May 7, 2009
* Time: 11:46:21 AM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 7, 2009
* <p/>
* Class ArgumentObject
* <p/>
* Encapsolute the massively large list of possible parameters we take in the Genome Analysis tool
*/
@Root
public class GATKArgumentCollection {
/* our version number */
private float versionNumber = 1;
private String description = "GATK Arguments";
/** the constructor */
public GATKArgumentCollection() {
}
// parameters and their defaults
@ElementList
@Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<File> samFiles = new ArrayList<File>();
@Element
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
public String maximumReads = "-1";
@Element
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation (LENIENT|SILENT|STRICT)", required = false)
public String strictnessLevel = "strict";
@Element
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
@Element
@Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false)
public String genomeRegion = null;
@Element
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
public String analysisName = null;
// parameters and their defaults
@ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true)
public Map<String, String> walkerArgs = new HashMap<String, String>();
@Element
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
public String DBSNPFile = null;
@Element
@Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
public String HAPMAPFile = null;
@Element
@Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
public String HAPMAPChipFile = null;
@Element
@Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
public Boolean EnabledThreadedIO = false;
@Element
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public Boolean unsafe = false;
@Element
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
public String maximumReadSorts = null;
@Element
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
public String downsampleFraction = null;
@Element
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
public String downsampleCoverage = null;
@Element
@Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := <contig> <start> <end>", required = false)
public String intervalsFile = null;
@Element
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
public Boolean walkAllLoci = false;
@Element
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false;
/** An output file presented to the walker. */
@Element
@Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String outFileName = null;
/** An error output file presented to the walker. */
@Element
@Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String errFileName = null;
/** A joint file for both 'normal' and error output presented to the walker. */
@Element
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
public String outErrFileName = null;
/** How many threads should be allocated to this analysis. */
@Element
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
public int numberOfThreads = 1;
@ElementList
@Argument(fullName = "rodBind", shortName = "B", doc = "", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>();
/**
* marshal the data out to a object
*
* @param collection the GATKArgumentCollection to load into
* @param outputFile the file to write to
*/
public static void marshal(GATKArgumentCollection collection, String outputFile) {
Serializer serializer = new Persister();
File result = new File(outputFile);
try {
serializer.write(collection, result);
} catch (Exception e) {
throw new StingException("Failed to marshal the data from the file " + outputFile, e);
}
}
/**
* unmashall the object from a configuration file
*
* @param filename the filename to marshal from
*/
public static GATKArgumentCollection unmarshal(String filename) {
Serializer serializer = new Persister();
File source = new File(filename);
try {
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
return example;
} catch (Exception e) {
throw new StingException("Failed to marshal the data to file " + filename, e);
}
}
/**
* test equality between two arg collections. This functions defines the statement:
* "not fun to write"
* @param other the other collection
* @return true if they're equal
*/
public boolean equals(GATKArgumentCollection other) {
if (other.samFiles.size() != samFiles.size()) {
return false;
}
for (int x = 0; x < samFiles.size(); x++) {
if (!samFiles.get(x).equals(other.samFiles.get(x))) {
return false;
}
}
if (other.walkerArgs.size() != walkerArgs.size()) {
return false;
}
for (String s : walkerArgs.keySet()) {
if (!other.walkerArgs.containsKey(s)) {
return false;
}
}
if (other.RODBindings.size() != RODBindings.size()) {
return false;
}
for (int x = 0; x < RODBindings.size(); x++) {
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
return false;
}
}
if (!other.samFiles.equals(this.samFiles)) {
return false;
}
if (!other.maximumReads.equals(this.maximumReads)) {
return false;
}
if (!other.strictnessLevel.equals(this.strictnessLevel)) {
return false;
}
if (!other.referenceFile.equals(this.referenceFile)) {
return false;
}
if (!other.genomeRegion.equals(this.genomeRegion)) {
return false;
}
if (!other.analysisName.equals(this.analysisName)) {
return false;
}
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
return false;
}
if (!other.HAPMAPFile.equals(this.HAPMAPFile)) {
return false;
}
if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
return false;
}
if (!other.EnabledThreadedIO.equals(this.EnabledThreadedIO)) {
return false;
}
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
if (!other.maximumReadSorts.equals(this.maximumReadSorts)) {
return false;
}
if (!other.downsampleFraction.equals(this.downsampleFraction)) {
return false;
}
if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
return false;
}
if (!other.intervalsFile.equals(this.intervalsFile)) {
return false;
}
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
return false;
}
if (!other.outFileName.equals(this.outFileName)) {
return false;
}
if (!other.errFileName.equals(this.errFileName)) {
return false;
}
if (!other.outErrFileName.equals(this.outErrFileName)) {
return false;
}
if (other.numberOfThreads != this.numberOfThreads) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,125 @@
package org.broadinstitute.sting.utils.cmdLine;
import org.broadinstitute.sting.BaseTest;
import org.junit.After;
import static org.junit.Assert.fail;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
* User: aaron
* Date: May 7, 2009
* Time: 1:12:58 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 7, 2009
* <p/>
* Class GATKArgumentCollection
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GATKArgumentCollectionTest extends BaseTest {
// our collection of arguments
private GATKArgumentCollection collect;
// where to write our xml file
private String xmlFileLoc = "testfile.xml";
/** setup our test */
@Before
public void setup() {
collect = new GATKArgumentCollection();
}
/** destroy the temp file */
@After
public void takedown() {
File f = new File(xmlFileLoc);
if (f.exists()) {
f.delete();
}
}
private void setupCollection() {
// parameters and their defaults
Map<String, String> wArgs = new HashMap<String, String>();
wArgs.put("wArgType1", "Arg1");
wArgs.put("wArgType2", "Arg2");
wArgs.put("wArgType3", "Arg3");
collect.walkerArgs = wArgs;
List<File> input = new ArrayList<File>();
input.add(new File("test.file"));
collect.samFiles = input;
collect.maximumReads = "-1";
collect.strictnessLevel = "strict";
collect.referenceFile = new File("referenceFile".toLowerCase());
collect.genomeRegion = "genomeRegion".toLowerCase();
collect.analysisName = "analysisName".toLowerCase();
collect.DBSNPFile = "DBSNPFile".toLowerCase();
collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
collect.EnabledThreadedIO = true;
collect.unsafe = false;
collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
collect.downsampleFraction = "downsampleFraction".toLowerCase();
collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
collect.intervalsFile = "intervalsFile".toLowerCase();
collect.walkAllLoci = true;
collect.disableThreading = false;
collect.outFileName = "outFileName".toLowerCase();
collect.errFileName = "errFileName".toLowerCase();
collect.outErrFileName = "outErrFileName".toLowerCase();
collect.numberOfThreads = 1;
// make some rod bindings up
ArrayList<String> fakeBindings = new ArrayList<String>();
fakeBindings.add("Bind1");
fakeBindings.add("Bind2");
fakeBindings.add("Bind3");
collect.RODBindings = fakeBindings;
}
/** test the output of an XML file in the arg collection */
@Test
public void testOutput() {
setupCollection();
GATKArgumentCollection.marshal(collect, xmlFileLoc);
GATKArgumentCollection collection = GATKArgumentCollection.unmarshal(xmlFileLoc);
if (!collect.equals(collection)) {
fail("Collections not equal");
}
}
/** test the output of an XML file in the arg collection */
@Test
public void testInput() {
setupCollection();
GATKArgumentCollection.marshal(collect, xmlFileLoc);
}
}