2010-04-20 07:00:08 +08:00
/ *
* Copyright ( c ) 2010 The Broad Institute
2010-04-20 23:26:32 +08:00
*
2010-04-20 07:00:08 +08:00
* Permission is hereby granted , free of charge , to any person
* obtaining a copy of this software and associated documentation
2010-04-20 23:26:32 +08:00
* files ( the "Software" ) , to deal in the Software without
2010-04-20 07:00:08 +08:00
* restriction , including without limitation the rights to use ,
* copy , modify , merge , publish , distribute , sublicense , and / or sell
* copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following
* conditions :
2010-04-20 23:26:32 +08:00
*
2010-04-20 07:00:08 +08:00
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software .
*
2010-04-20 23:26:32 +08:00
* THE SOFTWARE IS PROVIDED "AS IS" , WITHOUT WARRANTY OF ANY KIND ,
2010-04-20 07:00:08 +08:00
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY ,
* WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE .
* /
2010-01-15 08:14:35 +08:00
package org.broadinstitute.sting.gatk.arguments ;
2009-05-11 10:07:20 +08:00
2009-08-29 00:03:46 +08:00
import net.sf.samtools.SAMFileReader ;
2010-08-29 06:53:32 +08:00
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport ;
2010-09-12 23:07:38 +08:00
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException ;
2010-04-13 23:50:38 +08:00
import org.broadinstitute.sting.utils.interval.IntervalMergingRule ;
2010-04-20 07:00:08 +08:00
import org.broadinstitute.sting.commandline.Argument ;
2010-08-10 00:42:48 +08:00
import org.broadinstitute.sting.commandline.Input ;
2010-12-08 09:01:39 +08:00
import org.broadinstitute.sting.commandline.Hidden ;
2010-05-19 13:40:05 +08:00
import org.broadinstitute.sting.gatk.DownsampleType ;
2010-08-27 05:38:03 +08:00
import org.broadinstitute.sting.gatk.DownsamplingMethod ;
2010-08-09 22:00:52 +08:00
import org.broadinstitute.sting.utils.interval.IntervalSetRule ;
2010-12-07 01:43:43 +08:00
import org.broadinstitute.sting.utils.baq.BAQ ;
2009-05-11 10:07:20 +08:00
import org.simpleframework.xml.* ;
import org.simpleframework.xml.core.Persister ;
import org.simpleframework.xml.stream.Format ;
import org.simpleframework.xml.stream.HyphenStyle ;
import java.io.File ;
2009-06-11 04:46:16 +08:00
import java.io.InputStream ;
2009-10-29 03:56:05 +08:00
import java.io.PrintStream ;
2009-05-11 10:07:20 +08:00
import java.util.ArrayList ;
import java.util.HashMap ;
import java.util.List ;
import java.util.Map ;
/ * *
* @author aaron
* @version 1.0
* /
@Root
public class GATKArgumentCollection {
/* our version number */
private float versionNumber = 1 ;
private String description = "GATK Arguments" ;
/** the constructor */
public GATKArgumentCollection ( ) {
}
2009-06-11 04:46:16 +08:00
@ElementMap ( entry = "analysis_argument" , key = "key" , attribute = true , inline = true , required = false )
public Map < String , String > walkerArgs = new HashMap < String , String > ( ) ;
2009-05-21 03:01:25 +08:00
2009-05-11 10:07:20 +08:00
// parameters and their defaults
2009-06-11 04:46:16 +08:00
@ElementList ( required = false )
2010-08-10 00:42:48 +08:00
@Input ( fullName = "input_file" , shortName = "I" , doc = "SAM or BAM file(s)" , required = false )
2009-05-11 10:07:20 +08:00
public List < File > samFiles = new ArrayList < File > ( ) ;
2010-09-15 19:50:22 +08:00
// parameters and their defaults
@ElementList ( required = false )
@Argument ( fullName = "sample_metadata" , shortName = "SM" , doc = "Sample file(s) in JSON format" , required = false )
public List < File > sampleFiles = new ArrayList < File > ( ) ;
2010-07-02 06:28:55 +08:00
@Element ( required = false )
@Argument ( fullName = "read_buffer_size" , shortName = "rbs" , doc = "Number of reads per SAM file to buffer in memory" , required = false )
public Integer readBufferSize = null ;
2010-08-29 06:53:32 +08:00
@Element ( required = false )
@Argument ( fullName = "phone_home" , shortName = "et" , doc = "What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository" , required = false )
2010-09-12 22:34:15 +08:00
public GATKRunReport . PhoneHomeOption phoneHomeType = GATKRunReport . PhoneHomeOption . STANDARD ;
2010-08-29 06:53:32 +08:00
2009-11-11 02:40:16 +08:00
@ElementList ( required = false )
2009-12-24 05:59:14 +08:00
@Argument ( fullName = "read_filter" , shortName = "rf" , doc = "Specify filtration criteria to apply to each read individually." , required = false )
2009-11-11 02:40:16 +08:00
public List < String > readFilters = new ArrayList < String > ( ) ;
2009-06-26 04:44:23 +08:00
@ElementList ( required = false )
2010-08-10 00:42:48 +08:00
@Input ( fullName = "intervals" , shortName = "L" , doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file." , required = false )
2009-06-26 04:44:23 +08:00
public List < String > intervals = null ;
2009-05-11 10:07:20 +08:00
2010-03-13 03:23:12 +08:00
@ElementList ( required = false )
2010-08-10 00:42:48 +08:00
@Input ( fullName = "excludeIntervals" , shortName = "XL" , doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file." , required = false )
2010-03-13 03:23:12 +08:00
public List < String > excludeIntervals = null ;
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-08-10 00:42:48 +08:00
@Input ( fullName = "reference_sequence" , shortName = "R" , doc = "Reference sequence file" , required = false )
2009-05-11 10:07:20 +08:00
public File referenceFile = null ;
2009-06-11 04:46:16 +08:00
@ElementList ( required = false )
2010-12-02 01:36:47 +08:00
@Input ( fullName = "rodBind" , shortName = "B" , doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>" , required = false )
2009-05-21 03:01:25 +08:00
public ArrayList < String > RODBindings = new ArrayList < String > ( ) ;
2009-05-11 10:07:20 +08:00
2010-04-08 13:14:41 +08:00
@Element ( required = false )
@Argument ( fullName = "rodToIntervalTrackName" , shortName = "BTI" , doc = "Indicates that the named track should be converted into an interval list, to drive the traversal" , required = false )
2010-08-09 22:00:52 +08:00
public String RODToInterval = null ;
@Element ( required = false )
@Argument ( fullName = "BTI_merge_rule" , shortName = "BTIMR" , doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options" , required = false )
public IntervalSetRule BTIMergeRule = IntervalSetRule . UNION ;
2010-04-08 13:14:41 +08:00
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-08-10 00:42:48 +08:00
@Input ( fullName = "DBSNP" , shortName = "D" , doc = "DBSNP file" , required = false )
2009-05-11 10:07:20 +08:00
public String DBSNPFile = null ;
2010-08-27 05:38:03 +08:00
/ * *
* The override mechanism in the GATK , by default , populates the command - line arguments , then
* the defaults from the walker annotations . Unfortunately , walker annotations should be trumped
* by a user explicitly specifying command - line arguments .
* TODO : Change the GATK so that walker defaults are loaded first , then command - line arguments .
* /
private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType . BY_SAMPLE ;
private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000 ;
2009-05-29 22:51:08 +08:00
2010-05-19 13:40:05 +08:00
@Element ( required = false )
@Argument ( fullName = "downsampling_type" , shortName = "dt" , doc = "Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here." , required = false )
2010-08-27 05:38:03 +08:00
public DownsampleType downsamplingType = null ;
2010-05-19 13:40:05 +08:00
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2009-05-11 10:07:20 +08:00
@Argument ( fullName = "downsample_to_fraction" , shortName = "dfrac" , doc = "Fraction [0.0-1.0] of reads to downsample to" , required = false )
2009-05-16 05:02:12 +08:00
public Double downsampleFraction = null ;
2009-05-11 10:07:20 +08:00
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-10-05 01:27:01 +08:00
@Argument ( fullName = "downsample_to_coverage" , shortName = "dcov" , doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus" , required = false )
2009-06-11 04:46:16 +08:00
public Integer downsampleCoverage = null ;
2009-05-11 10:07:20 +08:00
2010-08-27 05:38:03 +08:00
/ * *
* Gets the downsampling method explicitly specified by the user . If the user didn ' t specify
* a default downsampling mechanism , return null .
* @return The explicitly specified downsampling mechanism , or null if none exists .
* /
public DownsamplingMethod getDownsamplingMethod ( ) {
if ( downsamplingType = = null & & downsampleFraction = = null & & downsampleCoverage = = null )
return null ;
2010-08-28 01:35:41 +08:00
if ( downsamplingType = = null & & downsampleCoverage ! = null )
return new DownsamplingMethod ( DEFAULT_DOWNSAMPLING_TYPE , downsampleCoverage , null ) ;
2010-08-27 05:38:03 +08:00
return new DownsamplingMethod ( downsamplingType , downsampleCoverage , downsampleFraction ) ;
}
2010-12-05 04:23:06 +08:00
@Element ( required = false )
@Argument ( fullName = "baq" , shortName = "baq" , doc = "Type of BAQ calculation to apply in the engine" , required = false )
BAQ calculation refactoring in the GATK. Single -baq argument can be NONE, CALCULATE_AS_NECESSARY, and RECALCULATE. Walkers can control bia the @BAQMode annotation how the BAQ calculation is applied. Can either be as a tag, by overwriting the qualities scores, or by only returning the baq-capped qualities scores. Additionally, walkers can be set up to have the BAQ applied to the incoming reads (ON_INPUT, the default), to output reads (ON_OUTPUT), or HANDLED_BY_WALKER, which means that calling into the BAQ system is the responsibility of the individual walker.
SAMFileWriterStub now supports BAQ writing as an internal feature. Several walkers have the @BAQMode applied to this, with parameters that I think are reasonable. Please look if you own these walkers, though
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4798 348d0f76-0448-11de-a6fe-93d51630548a
2010-12-07 04:55:52 +08:00
public BAQ . CalculationMode BAQMode = BAQ . CalculationMode . NONE ;
2010-12-05 04:23:06 +08:00
2010-12-08 09:01:39 +08:00
@Element ( required = false )
@Hidden
@Argument ( fullName = "baqGapOpenPenalty" , shortName = "baqGOP" , doc = "Gap open penalty. For testing purposes only" , required = false )
public double BAQGOP = 1e-3 ; // todo remove me
2010-08-27 05:38:03 +08:00
/ * *
* Gets the default downsampling method , returned if the user didn ' t specify any downsampling
* method .
* @return The default downsampling mechanism , or null if none exists .
* /
public DownsamplingMethod getDefaultDownsamplingMethod ( ) {
return new DownsamplingMethod ( DEFAULT_DOWNSAMPLING_TYPE , DEFAULT_DOWNSAMPLING_COVERAGE , null ) ;
}
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-03-16 10:06:11 +08:00
@Argument ( fullName = "useOriginalQualities" , shortName = "OQ" , doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores" , required = false )
public Boolean useOriginalBaseQualities = false ;
@Element ( required = false )
2010-01-21 05:36:46 +08:00
@Argument ( fullName = "validation_strictness" , shortName = "S" , doc = "How strict should we be with validation" , required = false )
2009-07-09 04:26:16 +08:00
public SAMFileReader . ValidationStringency strictnessLevel = SAMFileReader . ValidationStringency . SILENT ;
2009-05-11 10:07:20 +08:00
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-09-03 11:18:07 +08:00
@Argument ( fullName = "unsafe" , shortName = "U" , doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument." , required = false )
2010-01-15 08:14:35 +08:00
public ValidationExclusion . TYPE unsafe ;
2009-05-11 10:07:20 +08:00
/** How many threads should be allocated to this analysis. */
2009-06-11 04:46:16 +08:00
@Element ( required = false )
2010-05-19 13:40:05 +08:00
@Argument ( fullName = "num_threads" , shortName = "nt" , doc = "How many threads should be allocated to running this analysis." , required = false )
2009-05-11 10:07:20 +08:00
public int numberOfThreads = 1 ;
2009-12-24 05:59:14 +08:00
/** What rule should we use when merging intervals */
@Element ( required = false )
2010-01-21 05:36:46 +08:00
@Argument ( fullName = "interval_merging" , shortName = "im" , doc = "What interval merging rule should we use." , required = false )
2010-01-15 08:14:35 +08:00
public IntervalMergingRule intervalMerging = IntervalMergingRule . ALL ;
2009-12-24 05:59:14 +08:00
2010-03-30 23:34:04 +08:00
@ElementList ( required = false )
2010-08-14 07:52:24 +08:00
@Input ( fullName = "read_group_black_list" , shortName = "rgbl" , doc = "Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line." , required = false )
2010-03-27 03:38:57 +08:00
public List < String > readGroupBlackList = null ;
2009-05-11 10:07:20 +08:00
/ * *
* marshal the data out to a object
*
* @param collection the GATKArgumentCollection to load into
* @param outputFile the file to write to
* /
2009-12-24 05:59:14 +08:00
public static void marshal ( GATKArgumentCollection collection , String outputFile ) {
2009-05-11 10:07:20 +08:00
Serializer serializer = new Persister ( new Format ( new HyphenStyle ( ) ) ) ;
File result = new File ( outputFile ) ;
try {
serializer . write ( collection , result ) ;
} catch ( Exception e ) {
2010-09-12 23:07:38 +08:00
throw new ReviewedStingException ( "Failed to marshal the data to the file " + outputFile , e ) ;
2009-10-29 03:56:05 +08:00
}
}
/ * *
* marshal the data out to a object
*
* @param collection the GATKArgumentCollection to load into
* @param outputFile the stream to write to
* /
2009-12-24 05:59:14 +08:00
public static void marshal ( GATKArgumentCollection collection , PrintStream outputFile ) {
2009-10-29 03:56:05 +08:00
Serializer serializer = new Persister ( new Format ( new HyphenStyle ( ) ) ) ;
try {
serializer . write ( collection , outputFile ) ;
} catch ( Exception e ) {
2010-09-12 23:07:38 +08:00
throw new ReviewedStingException ( "Failed to marshal the data to the file " + outputFile , e ) ;
2009-05-11 10:07:20 +08:00
}
}
/ * *
* unmashall the object from a configuration file
*
* @param filename the filename to marshal from
* /
2009-12-24 05:59:14 +08:00
public static GATKArgumentCollection unmarshal ( String filename ) {
2009-05-11 10:07:20 +08:00
Serializer serializer = new Persister ( new Format ( new HyphenStyle ( ) ) ) ;
File source = new File ( filename ) ;
try {
GATKArgumentCollection example = serializer . read ( GATKArgumentCollection . class , source ) ;
return example ;
} catch ( Exception e ) {
2010-09-12 23:07:38 +08:00
throw new ReviewedStingException ( "Failed to marshal the data from file " + filename , e ) ;
2009-05-11 10:07:20 +08:00
}
}
2009-06-11 04:46:16 +08:00
/ * *
* unmashall the object from a configuration file
*
* @param file the inputstream to marshal from
* /
2009-12-24 05:59:14 +08:00
public static GATKArgumentCollection unmarshal ( InputStream file ) {
2009-06-11 04:46:16 +08:00
Serializer serializer = new Persister ( new Format ( new HyphenStyle ( ) ) ) ;
try {
GATKArgumentCollection example = serializer . read ( GATKArgumentCollection . class , file ) ;
return example ;
} catch ( Exception e ) {
2010-09-12 23:07:38 +08:00
throw new ReviewedStingException ( "Failed to marshal the data from file " + file . toString ( ) , e ) ;
2009-06-11 04:46:16 +08:00
}
}
2009-05-11 10:07:20 +08:00
/ * *
* test equality between two arg collections . This function defines the statement :
* "not fun to write"
2009-06-11 04:46:16 +08:00
*
2009-05-11 10:07:20 +08:00
* @param other the other collection
2009-06-11 04:46:16 +08:00
*
2009-05-11 10:07:20 +08:00
* @return true if they ' re equal
* /
2009-12-24 05:59:14 +08:00
public boolean equals ( GATKArgumentCollection other ) {
2010-04-08 13:14:41 +08:00
if ( other = = null ) return false ;
2009-05-11 10:07:20 +08:00
if ( other . samFiles . size ( ) ! = samFiles . size ( ) ) {
return false ;
}
for ( int x = 0 ; x < samFiles . size ( ) ; x + + ) {
if ( ! samFiles . get ( x ) . equals ( other . samFiles . get ( x ) ) ) {
return false ;
}
}
if ( other . walkerArgs . size ( ) ! = walkerArgs . size ( ) ) {
return false ;
}
for ( String s : walkerArgs . keySet ( ) ) {
if ( ! other . walkerArgs . containsKey ( s ) ) {
return false ;
}
}
if ( other . RODBindings . size ( ) ! = RODBindings . size ( ) ) {
return false ;
}
for ( int x = 0 ; x < RODBindings . size ( ) ; x + + ) {
if ( ! RODBindings . get ( x ) . equals ( other . RODBindings . get ( x ) ) ) {
return false ;
}
}
if ( ! other . samFiles . equals ( this . samFiles ) ) {
return false ;
}
2010-07-02 06:28:55 +08:00
if ( other . readBufferSize = = null | | this . readBufferSize = = null ) {
// If either is null, return false if they're both null, otherwise keep going...
if ( other . readBufferSize ! = null | | this . readBufferSize ! = null )
return false ;
}
else {
if ( ! other . readBufferSize . equals ( this . readBufferSize ) )
return false ;
}
if ( ! ( other . readBufferSize = = null & & this . readBufferSize = = null ) & & ( other . readBufferSize = = null | | this . readBufferSize = = null ) ) {
return false ;
}
2009-05-11 10:07:20 +08:00
if ( ! other . strictnessLevel . equals ( this . strictnessLevel ) ) {
return false ;
}
if ( ! other . referenceFile . equals ( this . referenceFile ) ) {
return false ;
}
2009-05-12 09:04:18 +08:00
if ( ! other . intervals . equals ( this . intervals ) ) {
2009-05-11 10:07:20 +08:00
return false ;
}
2010-03-13 03:23:12 +08:00
if ( ! other . excludeIntervals . equals ( this . excludeIntervals ) ) {
return false ;
}
2009-05-11 10:07:20 +08:00
if ( ! other . DBSNPFile . equals ( this . DBSNPFile ) ) {
return false ;
}
if ( ! other . unsafe . equals ( this . unsafe ) ) {
return false ;
}
2009-12-24 05:59:14 +08:00
if ( ( other . downsampleFraction = = null & & this . downsampleFraction ! = null ) | |
( other . downsampleFraction ! = null & & ! other . downsampleFraction . equals ( this . downsampleFraction ) ) ) {
2009-05-11 10:07:20 +08:00
return false ;
}
2009-12-24 05:59:14 +08:00
if ( ( other . downsampleCoverage = = null & & this . downsampleCoverage ! = null ) | |
( other . downsampleCoverage ! = null & & ! other . downsampleCoverage . equals ( this . downsampleCoverage ) ) ) {
2009-05-11 10:07:20 +08:00
return false ;
}
if ( other . numberOfThreads ! = this . numberOfThreads ) {
return false ;
}
2009-12-24 05:59:14 +08:00
if ( other . intervalMerging ! = this . intervalMerging ) {
return false ;
}
2010-04-08 13:14:41 +08:00
if ( ( other . RODToInterval = = null & & RODToInterval ! = null ) | |
( other . RODToInterval ! = null & & ! other . RODToInterval . equals ( RODToInterval ) ) ) {
return false ;
}
2010-08-29 06:53:32 +08:00
if ( other . phoneHomeType ! = this . phoneHomeType ) {
return false ;
}
2010-08-09 22:00:52 +08:00
if ( BTIMergeRule ! = other . BTIMergeRule )
2010-12-05 04:23:06 +08:00
return false ;
if ( BAQMode ! = other . BAQMode )
return false ;
2009-09-03 11:06:25 +08:00
2009-05-11 10:07:20 +08:00
return true ;
}
2009-12-24 05:59:14 +08:00
2009-05-11 10:07:20 +08:00
}
2009-12-24 05:59:14 +08:00