/* * Copyright (c) 2010 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; import org.simpleframework.xml.core.Persister; import org.simpleframework.xml.stream.Format; import org.simpleframework.xml.stream.HyphenStyle; import java.io.File; import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * @author aaron * @version 1.0 */ @Root public class GATKArgumentCollection { /* our version number */ private float versionNumber = 1; private String description = "GATK Arguments"; /** the constructor */ public GATKArgumentCollection() { } @ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required = false) public Map walkerArgs = new HashMap(); // parameters and their defaults @ElementList(required = false) @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) public List samFiles = new ArrayList(); @Element(required = false) @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; @Element(required = false) @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.NO_ET; @ElementList(required = false) @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false) public List readFilters = new ArrayList(); @ElementList(required = false) @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) public List intervals = null; @ElementList(required = false) @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false) public List excludeIntervals = null; @Element(required = false) @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) public File referenceFile = null; @ElementList(required = false) @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form ,,", required = false) public ArrayList RODBindings = new ArrayList(); @Element(required = false) @Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false) public String RODToInterval = null; @Element(required = false) @Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false) public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION; @Element(required = false) @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) public String DBSNPFile = null; /** * The override mechanism in the GATK, by default, populates the command-line arguments, then * the defaults from the walker annotations. Unfortunately, walker annotations should be trumped * by a user explicitly specifying command-line arguments. * TODO: Change the GATK so that walker defaults are loaded first, then command-line arguments. */ private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000; @Element(required = false) @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here.", required = false) public DownsampleType downsamplingType = null; @Element(required = false) @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false) public Double downsampleFraction = null; @Element(required = false) @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false) public Integer downsampleCoverage = null; /** * Gets the downsampling method explicitly specified by the user. If the user didn't specify * a default downsampling mechanism, return null. * @return The explicitly specified downsampling mechanism, or null if none exists. */ public DownsamplingMethod getDownsamplingMethod() { if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null) return null; if(downsamplingType == null && downsampleCoverage != null) return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,downsampleCoverage,null); return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction); } /** * Gets the default downsampling method, returned if the user didn't specify any downsampling * method. * @return The default downsampling mechanism, or null if none exists. */ public DownsamplingMethod getDefaultDownsamplingMethod() { return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null); } @Element(required = false) @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) public Boolean useOriginalBaseQualities = false; @Element(required = false) @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; @Element(required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; /** How many threads should be allocated to this analysis. */ @Element(required = false) @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false) public int numberOfThreads = 1; /** What rule should we use when merging intervals */ @Element(required = false) @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false) public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; @ElementList(required = false) @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false) public List readGroupBlackList = null; /** * marshal the data out to a object * * @param collection the GATKArgumentCollection to load into * @param outputFile the file to write to */ public static void marshal(GATKArgumentCollection collection, String outputFile) { Serializer serializer = new Persister(new Format(new HyphenStyle())); File result = new File(outputFile); try { serializer.write(collection, result); } catch (Exception e) { throw new StingException("Failed to marshal the data to the file " + outputFile, e); } } /** * marshal the data out to a object * * @param collection the GATKArgumentCollection to load into * @param outputFile the stream to write to */ public static void marshal(GATKArgumentCollection collection, PrintStream outputFile) { Serializer serializer = new Persister(new Format(new HyphenStyle())); try { serializer.write(collection, outputFile); } catch (Exception e) { throw new StingException("Failed to marshal the data to the file " + outputFile, e); } } /** * unmashall the object from a configuration file * * @param filename the filename to marshal from */ public static GATKArgumentCollection unmarshal(String filename) { Serializer serializer = new Persister(new Format(new HyphenStyle())); File source = new File(filename); try { GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source); return example; } catch (Exception e) { throw new StingException("Failed to marshal the data from file " + filename, e); } } /** * unmashall the object from a configuration file * * @param file the inputstream to marshal from */ public static GATKArgumentCollection unmarshal(InputStream file) { Serializer serializer = new Persister(new Format(new HyphenStyle())); try { GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, file); return example; } catch (Exception e) { throw new StingException("Failed to marshal the data from file " + file.toString(), e); } } /** * test equality between two arg collections. This function defines the statement: * "not fun to write" * * @param other the other collection * * @return true if they're equal */ public boolean equals(GATKArgumentCollection other) { if (other == null) return false; if (other.samFiles.size() != samFiles.size()) { return false; } for (int x = 0; x < samFiles.size(); x++) { if (!samFiles.get(x).equals(other.samFiles.get(x))) { return false; } } if (other.walkerArgs.size() != walkerArgs.size()) { return false; } for (String s : walkerArgs.keySet()) { if (!other.walkerArgs.containsKey(s)) { return false; } } if (other.RODBindings.size() != RODBindings.size()) { return false; } for (int x = 0; x < RODBindings.size(); x++) { if (!RODBindings.get(x).equals(other.RODBindings.get(x))) { return false; } } if (!other.samFiles.equals(this.samFiles)) { return false; } if(other.readBufferSize == null || this.readBufferSize == null) { // If either is null, return false if they're both null, otherwise keep going... if(other.readBufferSize != null || this.readBufferSize != null) return false; } else { if(!other.readBufferSize.equals(this.readBufferSize)) return false; } if (!(other.readBufferSize == null && this.readBufferSize == null) && (other.readBufferSize == null || this.readBufferSize == null)) { return false; } if (!other.strictnessLevel.equals(this.strictnessLevel)) { return false; } if (!other.referenceFile.equals(this.referenceFile)) { return false; } if (!other.intervals.equals(this.intervals)) { return false; } if (!other.excludeIntervals.equals(this.excludeIntervals)) { return false; } if (!other.DBSNPFile.equals(this.DBSNPFile)) { return false; } if (!other.unsafe.equals(this.unsafe)) { return false; } if ((other.downsampleFraction == null && this.downsampleFraction != null) || (other.downsampleFraction != null && !other.downsampleFraction.equals(this.downsampleFraction))) { return false; } if ((other.downsampleCoverage == null && this.downsampleCoverage != null) || (other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage))) { return false; } if (other.numberOfThreads != this.numberOfThreads) { return false; } if (other.intervalMerging != this.intervalMerging) { return false; } if ((other.RODToInterval == null && RODToInterval != null) || (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) { return false; } if (other.phoneHomeType != this.phoneHomeType) { return false; } if (BTIMergeRule != other.BTIMergeRule) return false; // if (other.enableRodWalkers != this.enableRodWalkers) { // return false; // } return true; } }