From db9570ae291cc9c9b3489253ca8e11009e778433 Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 15 Jan 2010 00:14:35 +0000 Subject: [PATCH] Looks bigger than it is: * Moved GATKArgumentCollection into gatk.arguments folder to clean up the main folder, also added some associated argument classes (most of the changes). * Added code the argument parsing system for default enums, we needed this so we could preserve the current unsafe flag, and at the same time allow finer grained control of unsafe operations. You can now specify: "-U" (for all unsafe operations), "-U ALLOW_UNINDEXED_BAM" (only allow unindexed BAMs), "-U NO_READ_ORDER_VERIFICATION", etc. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2586 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineExecutable.java | 1 + .../sting/gatk/CommandLineGATK.java | 1 + .../sting/gatk/GenomeAnalysisEngine.java | 17 ++++-- .../org/broadinstitute/sting/gatk/Reads.java | 16 +++--- .../GATKArgumentCollection.java | 21 ++----- .../gatk/arguments/IntervalMergingRule.java | 17 ++++++ .../gatk/arguments/ValidationExclusion.java | 55 +++++++++++++++++++ .../simpleDataSources/SAMDataSource.java | 16 +++--- .../iterators/MergingSamRecordIterator2.java | 3 +- .../CalculatePhaseLikelihoodsWalker.java | 2 +- .../walkers/variantstovcf/VariantsToVCF.java | 2 +- .../sting/utils/GATKErrorReport.java | 2 +- .../sting/utils/GenomeLocParser.java | 21 ++++--- .../sting/utils/bed/BedParser.java | 4 +- .../utils/cmdLine/ArgumentTypeDescriptor.java | 9 ++- .../cmdLine/EnumerationArgumentDefault.java | 40 ++++++++++++++ .../GATKArgumentCollectionTest.java | 5 +- .../sting/utils/GenomeLocParserTest.java | 14 ++--- .../sting/utils/bed/BedParserTest.java | 5 +- 19 files changed, 185 insertions(+), 66 deletions(-) rename java/src/org/broadinstitute/sting/gatk/{ => arguments}/GATKArgumentCollection.java (93%) create mode 100644 java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java create mode 100644 java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java create mode 100644 java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java rename java/test/org/broadinstitute/sting/gatk/{ => arguments}/GATKArgumentCollectionTest.java (95%) diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 782ededa0..19de5dd48 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import org.broadinstitute.sting.utils.cmdLine.ArgumentTypeDescriptor; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 79f114955..6256bb02b 100755 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.GATKErrorReport; import org.broadinstitute.sting.utils.TextFormattingUtils; import org.broadinstitute.sting.utils.help.ApplicationDetails; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 10ef3d375..ffc560bd9 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -38,6 +38,8 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy; import org.broadinstitute.sting.gatk.executive.MicroScheduler; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; @@ -157,7 +159,11 @@ public class GenomeAnalysisEngine { locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); } - ShardStrategy shardStrategy = getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations); + ShardStrategy shardStrategy = getShardStrategy(my_walker, + microScheduler.getReference(), + locs, + argCollection.maximumEngineIterations, + readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null); // execute the microscheduler, storing the results return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations); @@ -418,7 +424,7 @@ public class GenomeAnalysisEngine { argCollection.strictnessLevel, argCollection.downsampleFraction, argCollection.downsampleCoverage, - !argCollection.unsafe, + new ValidationExclusion(Arrays.asList(argCollection.unsafe)), filters, argCollection.readMaxPileup, walker.includeReadsWithDeletionAtLoci(), @@ -573,9 +579,10 @@ public class GenomeAnalysisEngine { protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals, - Integer maxIterations) { + Integer maxIterations, + ValidationExclusion exclusions) { if(readsDataSource != null && !readsDataSource.hasIndex()) { - if(!getArguments().unsafe || intervals != null) + if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM) || intervals != null) throw new StingException("The GATK cannot currently process unindexed BAM files"); Shard.ShardType shardType; @@ -626,7 +633,7 @@ public class GenomeAnalysisEngine { SHARD_SIZE, maxIterations); } } else if (walker instanceof LocusWindowWalker) { - if ((intervals == null || intervals.isEmpty()) && !this.argCollection.unsafe) + if ((intervals == null || intervals.isEmpty()) && !exclusions.contains(ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST)) Utils.warnUser("walker is of type LocusWindow (which operates over intervals), but no intervals were provided." + "This may be unintentional, check your command-line arguments."); shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 8e40f3495..0a4a33db8 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import java.io.File; import java.util.ArrayList; @@ -29,12 +30,12 @@ public class Reads { private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Double downsamplingFraction = null; private Integer downsampleToCoverage = null; - private Boolean beSafe = null; + private ValidationExclusion exclusionList = null; private Collection supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private boolean includeReadsWithDeletionAtLoci = false; private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) - // immediately after the reference base such event is associated with? +// immediately after the reference base such event is associated with? /** @@ -102,8 +103,8 @@ public class Reads { * Return whether to 'verify' the reads as we pass through them. * @return Whether to verify the reads. */ - public Boolean getSafetyChecking() { - return beSafe; + public ValidationExclusion getValidationExclusionList() { + return exclusionList; } public Collection getSupplementalFilters() { @@ -117,6 +118,7 @@ public class Reads { public Reads( List readsFiles ) { this.readsFiles = readsFiles; this.supplementalFilters = new ArrayList(); + this.exclusionList = new ValidationExclusion(); } /** @@ -127,7 +129,7 @@ public class Reads { * @param strictness Stringency of reads file parsing. * @param downsampleFraction fraction of reads to downsample. * @param downsampleCoverage downsampling per-locus. - * @param beSafe Whether to enable safety checking. + * @param exclusionList what safety checks we're willing to let slide * @param supplementalFilters additional filters to dynamically apply. * @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with * a pile of indel/noevent extended events at every locus with at least one indel associated with it @@ -140,7 +142,7 @@ public class Reads { SAMFileReader.ValidationStringency strictness, Double downsampleFraction, Integer downsampleCoverage, - Boolean beSafe, + ValidationExclusion exclusionList, Collection supplementalFilters, int maximumReadsAtLocus, boolean includeReadsWithDeletionAtLoci, @@ -149,7 +151,7 @@ public class Reads { this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; this.downsampleToCoverage = downsampleCoverage; - this.beSafe = beSafe; + this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList; this.supplementalFilters = supplementalFilters; this.maximumReadsAtLocus = maximumReadsAtLocus; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java similarity index 93% rename from java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java rename to java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index f9e5645a6..63ce86b8c 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk; +package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.utils.StingException; @@ -121,7 +121,7 @@ public class GATKArgumentCollection { @Element(required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) - public Boolean unsafe = false; + public ValidationExclusion.TYPE unsafe; @Element(required = false) @Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. int.MAX_VALUE by default.", required = false) @@ -139,13 +139,14 @@ public class GATKArgumentCollection { /** What rule should we use when merging intervals */ @Element(required = false) @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use {ALL [DEFAULT],OVERLAPPING_ONLY,NONE}.", required = false) - public INTERVAL_MERGING_RULE intervalMerging = INTERVAL_MERGING_RULE.ALL; + public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; /** Should we enable rodWalkers? This is currently unsafe */ @Element(required = false) @Argument(fullName = "enableRodWalkers", shortName = "erw", doc = "Enable experimental rodWalker support. TEMPORARY HACK TO ALLOW EXPERIMENTATION WITH ROD WALKERS. [default is false]}.", required = false) public boolean enableRodWalkers = false; + /** * marshal the data out to a object * @@ -306,19 +307,5 @@ public class GATKArgumentCollection { return true; } - /** - * a class we use to determine the merging rules for intervals passed to the GATK - */ - public enum INTERVAL_MERGING_RULE { - ALL, // we merge both overlapping intervals and abutting intervals - OVERLAPPING_ONLY, // We merge intervals that are overlapping, but NOT ones that only abut each other - NONE; // we merge neither overlapping or abutting intervals, the list of intervals is sorted, but not merged - - public boolean check() { - if (this.compareTo(NONE) == 0) - throw new UnsupportedOperationException("We Currently do not support INTERVAL_MERGING_RULE.NONE"); - return true; - } - } } diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java b/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java new file mode 100644 index 000000000..c6c6c221c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.gatk.arguments; + + +/** + * a class we use to determine the merging rules for intervals passed to the GATK + */ +public enum IntervalMergingRule { + ALL, // we merge both overlapping intervals and abutting intervals + OVERLAPPING_ONLY, // We merge intervals that are overlapping, but NOT ones that only abut each other + NONE; // we merge neither overlapping or abutting intervals, the list of intervals is sorted, but not merged + + public boolean check() { + if (this.compareTo(NONE) == 0) + throw new UnsupportedOperationException("We Currently do not support IntervalMergingRule.NONE"); + return true; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java b/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java new file mode 100644 index 000000000..774081b5c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.gatk.arguments; + +import org.broadinstitute.sting.utils.cmdLine.EnumerationArgumentDefault; + +import java.util.ArrayList; +import java.util.List; + + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + * @author aaron + * + * Class ValidationExclusion + * + * a class for containing the exclusions from validation that the user + * wants. + */ +public class ValidationExclusion { + + // our validation options + + public enum TYPE { + ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard + ALLOW_EMPTY_INTERVAL_LIST, // allow the user to pass in an empty interval list + ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set + NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file + @EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL + ALL // do not check for all of the above conditions, DEFAULT + } + + // a storage for the passed in exclusions + List exclusions = new ArrayList(); + + public ValidationExclusion(List exclusionsList) { + exclusions.addAll(exclusionsList); + } + + public ValidationExclusion() {} + + /** + * do we contain the exclusion specified, or were we set to ALL + * @param t the exclusion case to test for + * @return true if we contain the exclusion or if we're set to ALL, false otherwise + */ + public boolean contains(TYPE t) { + return (exclusions.contains(TYPE.ALL) || exclusions.contains(t)); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 4c68c7c55..b8e405374 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -11,6 +11,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; @@ -19,7 +20,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import java.io.File; -import java.util.List; import java.util.Collection; /* @@ -164,14 +164,14 @@ public class SAMDataSource implements SimpleDataSource { iterator = applyDecoratingIterators(true, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); } else if (shard.getShardType() == Shard.ShardType.LOCUS) { iterator = seekLocus(shard); iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); } else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) || (shard.getShardType() == Shard.ShardType.READ_INTERVAL)) { @@ -179,7 +179,7 @@ public class SAMDataSource implements SimpleDataSource { iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); // add the new overlapping detection iterator, if we have a last interval and we're a read based shard @@ -430,21 +430,23 @@ public class SAMDataSource implements SimpleDataSource { * @param enableVerification Verify the order of reads. * @param wrappedIterator the raw data source. * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). - * @param beSafeP Another trigger for the verifying iterator? TODO: look into this. + * @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this. * @param supplementalFilters additional filters to apply to the reads. * @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null. */ private StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator, Double downsamplingFraction, - Boolean beSafeP, + Boolean noValidationOfReadOrder, Collection supplementalFilters) { // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); - if (beSafeP != null && beSafeP && enableVerification) + // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification, + // verify the read ordering by applying a sort order iterator + if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); for( SamRecordFilter supplementalFilter: supplementalFilters ) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index a70ebcc5d..cd73787dd 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -33,6 +33,7 @@ import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; @@ -101,7 +102,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) { String msg = String.format("The GATK requires your bam have %s sort order, but your BAM file header %s. Continuing beyond this point is unsafe -- please update your BAM file to have a compatible sort order using samtools sort or Picard MergeBamFiles", this.sortOrder, reader.getFileHeader().getAttribute("SO") == null ? "is missing the SO sort order flag" : "has an SO flag set to " + reader.getFileHeader().getAttribute("SO")); - if (reads.getSafetyChecking()) { + if (!reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.ALLOW_UNSET_BAM_SORT_ORDER)) { throw new PicardException(msg); } else if (!warnedUserAboutSortOrder) { warnedUserAboutSortOrder = true; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java index 31cef5e47..605b6d118 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.cmdLine.Argument; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 32c7f3040..7ee3fefee 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; diff --git a/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java b/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java index 3ea6d0cc4..8689d9bca 100644 --- a/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java +++ b/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.simpleframework.xml.Element; import org.simpleframework. xml.ElementList; import org.simpleframework.xml.Serializer; diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index c17b596ba..e32afee4d 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -7,8 +7,9 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import java.io.File; import java.util.ArrayList; @@ -137,7 +138,7 @@ public class GenomeLocParser { * @param rule the merging rule we're using * @return a list of sorted, merged intervals. */ - public static List parseIntervals(List intervalsSource, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List parseIntervals(List intervalsSource, IntervalMergingRule rule) { List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource); Collections.sort(parsedIntervals); return GenomeLocParser.mergeIntervalLocations(parsedIntervals, rule); @@ -202,7 +203,7 @@ public class GenomeLocParser { * * @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order */ - public static List parseGenomeLocs(final String str, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List parseGenomeLocs(final String str, IntervalMergingRule rule) { // Null string means no filter. if (str == null) return null; @@ -214,9 +215,7 @@ public class GenomeLocParser { for (String loc : str.split(";")) locs.add(parseGenomeLoc(loc.trim())); Collections.sort(locs); - //logger.info(String.format("Going to process %d locations", locs.length)); - locs = mergeIntervalLocations(locs, rule); - logger.debug("Locations are:" + Utils.join(", ", locs)); + locs = mergeIntervalLocations(locs, rule); return locs; } catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e); @@ -242,9 +241,9 @@ public class GenomeLocParser { * * @return the list of merged locations */ - public static List mergeIntervalLocations(final List raw, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { logger.debug(" Raw locations are: " + Utils.join(", ", raw)); - if (raw.size() <= 1 || rule == GATKArgumentCollection.INTERVAL_MERGING_RULE.NONE) + if (raw.size() <= 1 || rule == IntervalMergingRule.NONE) return raw; else { ArrayList merged = new ArrayList(); @@ -254,7 +253,7 @@ public class GenomeLocParser { GenomeLoc curr = it.next(); if (prev.overlapsP(curr)) { prev = prev.merge(curr); - } else if (prev.contiguousP(curr) && rule == GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL) { + } else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) { prev = prev.merge(curr); } else { merged.add(prev); @@ -315,7 +314,7 @@ public class GenomeLocParser { * @param file_name * @param rule also merge abutting intervals */ - public static List intervalFileToList(final String file_name, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List intervalFileToList(final String file_name, IntervalMergingRule rule) { /** * first try to read it as an interval file since that's well structured * we'll fail quickly if it's not a valid file. Then try to parse it as @@ -327,7 +326,7 @@ public class GenomeLocParser { // sometimes we see an empty file passed as a parameter, if so return an empty list if (inputFile.exists() && inputFile.length() < 1) { - if (GenomeAnalysisEngine.instance.getArguments().unsafe) + if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST) return new ArrayList(); else { Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " + diff --git a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index a8a8dcce8..7ec3637cc 100644 --- a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -104,7 +104,7 @@ public class BedParser { * @param rule the rule to merge intervals with * @return a list of genome locs, sorted and merged */ - public List getSortedAndMergedLocations(GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public List getSortedAndMergedLocations(IntervalMergingRule rule) { List locs = new ArrayList(); locs.addAll(mLocations); Collections.sort(locs); diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java index e0597d513..9c7db1d01 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.cmdLine; import org.broadinstitute.sting.utils.StingException; import org.apache.log4j.Logger; +import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; @@ -270,8 +271,14 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return valueOf.invoke(null,value.trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); - for (Object val : vals) + Object defaultEnumeration = null; // as we look at options, record the default option if it exists + for (Object val : vals) { if (String.valueOf(val).equalsIgnoreCase(value)) return val; + try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } + catch (NoSuchFieldException e) { throw new StingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } + } + // if their argument has no value (null), and there's a default, return that default for the enum value + if (defaultEnumeration != null && value == null) return defaultEnumeration; throw new UnknownEnumeratedValueException(value, type.getName()); } else { Constructor ctor = type.getConstructor(String.class); diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java b/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java new file mode 100644 index 000000000..62e8d8b4b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java @@ -0,0 +1,40 @@ +package org.broadinstitute.sting.utils.cmdLine; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * @author aaron + *

+ * Annotation EnumerationArgumentDefault + *

+ * Allows the default argument value to be set for an enum; this allows us to treat enums as + * booleans on the command line. I.e. + * + * if we're using an enum Shape, + * + * enum shape { + * SQUARE, + * CIRCLE, + * @EnumerationArgumentDefault + * TRIANGLE + * } + * + * and a command line option -shape, the EnumerationArgumentDefault would allow you to say: + * -shape + * or + * -shape TRIANGLE + * + * would get -shape set to TRIANGLE, where: + * + * -shape SQUARE + * + * would set shape to SQUARE + * + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface EnumerationArgumentDefault { +} diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java similarity index 95% rename from java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java rename to java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java index 945db5180..74f313079 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java @@ -1,6 +1,7 @@ -package org.broadinstitute.sting.gatk; +package org.broadinstitute.sting.gatk.arguments; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.junit.After; import static org.junit.Assert.fail; import org.junit.Before; @@ -85,7 +86,7 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); - collect.unsafe = false; + collect.unsafe = ValidationExclusion.TYPE.ALL; collect.downsampleFraction = null; collect.downsampleCoverage = null; collect.intervals = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java index c8220ee38..8f30428d8 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java @@ -4,7 +4,7 @@ import static junit.framework.Assert.assertTrue; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import static org.junit.Assert.assertEquals; @@ -83,33 +83,33 @@ public class GenomeLocParserTest extends BaseTest { @Test(expected = RuntimeException.class) public void testParseBadLocations() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0", IntervalMergingRule.ALL); } @Test public void testParseGoodLocations() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9", IntervalMergingRule.ALL); } @Test(expected = RuntimeException.class) public void testParseGoodLocationsTooManySemiColons() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;", IntervalMergingRule.ALL); } @Test public void testOverlappingGoodLocationsWithAbuttingFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.OVERLAPPING_ONLY); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY); assertEquals(1, locs.size()); } @Test public void testAbuttingGoodLocationsWithAbuttingOffFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-4;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.OVERLAPPING_ONLY); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-4;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY); assertEquals(2, locs.size()); } @Test public void testAbuttingGoodLocationsWithNoneFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.NONE); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.NONE); assertEquals(2, locs.size()); } diff --git a/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java b/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java index ef3d17b9b..914bf78c7 100644 --- a/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java +++ b/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.bed; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -12,7 +12,6 @@ import org.junit.Assert; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; import java.util.List; @@ -64,7 +63,7 @@ public class BedParserTest extends BaseTest { @Test public void testLoadBedFileOverlapping() { BedParser parser = new BedParser(bedFile); - List location = parser.getSortedAndMergedLocations(GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + List location = parser.getSortedAndMergedLocations(IntervalMergingRule.ALL); Assert.assertEquals(3, location.size()); } }