diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 782ededa0..19de5dd48 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import org.broadinstitute.sting.utils.cmdLine.ArgumentTypeDescriptor; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 79f114955..6256bb02b 100755 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.GATKErrorReport; import org.broadinstitute.sting.utils.TextFormattingUtils; import org.broadinstitute.sting.utils.help.ApplicationDetails; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 10ef3d375..ffc560bd9 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -38,6 +38,8 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy; import org.broadinstitute.sting.gatk.executive.MicroScheduler; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; @@ -157,7 +159,11 @@ public class GenomeAnalysisEngine { locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); } - ShardStrategy shardStrategy = getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations); + ShardStrategy shardStrategy = getShardStrategy(my_walker, + microScheduler.getReference(), + locs, + argCollection.maximumEngineIterations, + readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null); // execute the microscheduler, storing the results return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations); @@ -418,7 +424,7 @@ public class GenomeAnalysisEngine { argCollection.strictnessLevel, argCollection.downsampleFraction, argCollection.downsampleCoverage, - !argCollection.unsafe, + new ValidationExclusion(Arrays.asList(argCollection.unsafe)), filters, argCollection.readMaxPileup, walker.includeReadsWithDeletionAtLoci(), @@ -573,9 +579,10 @@ public class GenomeAnalysisEngine { protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals, - Integer maxIterations) { + Integer maxIterations, + ValidationExclusion exclusions) { if(readsDataSource != null && !readsDataSource.hasIndex()) { - if(!getArguments().unsafe || intervals != null) + if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM) || intervals != null) throw new StingException("The GATK cannot currently process unindexed BAM files"); Shard.ShardType shardType; @@ -626,7 +633,7 @@ public class GenomeAnalysisEngine { SHARD_SIZE, maxIterations); } } else if (walker instanceof LocusWindowWalker) { - if ((intervals == null || intervals.isEmpty()) && !this.argCollection.unsafe) + if ((intervals == null || intervals.isEmpty()) && !exclusions.contains(ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST)) Utils.warnUser("walker is of type LocusWindow (which operates over intervals), but no intervals were provided." + "This may be unintentional, check your command-line arguments."); shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 8e40f3495..0a4a33db8 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import java.io.File; import java.util.ArrayList; @@ -29,12 +30,12 @@ public class Reads { private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Double downsamplingFraction = null; private Integer downsampleToCoverage = null; - private Boolean beSafe = null; + private ValidationExclusion exclusionList = null; private Collection supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private boolean includeReadsWithDeletionAtLoci = false; private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) - // immediately after the reference base such event is associated with? +// immediately after the reference base such event is associated with? /** @@ -102,8 +103,8 @@ public class Reads { * Return whether to 'verify' the reads as we pass through them. * @return Whether to verify the reads. */ - public Boolean getSafetyChecking() { - return beSafe; + public ValidationExclusion getValidationExclusionList() { + return exclusionList; } public Collection getSupplementalFilters() { @@ -117,6 +118,7 @@ public class Reads { public Reads( List readsFiles ) { this.readsFiles = readsFiles; this.supplementalFilters = new ArrayList(); + this.exclusionList = new ValidationExclusion(); } /** @@ -127,7 +129,7 @@ public class Reads { * @param strictness Stringency of reads file parsing. * @param downsampleFraction fraction of reads to downsample. * @param downsampleCoverage downsampling per-locus. - * @param beSafe Whether to enable safety checking. + * @param exclusionList what safety checks we're willing to let slide * @param supplementalFilters additional filters to dynamically apply. * @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with * a pile of indel/noevent extended events at every locus with at least one indel associated with it @@ -140,7 +142,7 @@ public class Reads { SAMFileReader.ValidationStringency strictness, Double downsampleFraction, Integer downsampleCoverage, - Boolean beSafe, + ValidationExclusion exclusionList, Collection supplementalFilters, int maximumReadsAtLocus, boolean includeReadsWithDeletionAtLoci, @@ -149,7 +151,7 @@ public class Reads { this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; this.downsampleToCoverage = downsampleCoverage; - this.beSafe = beSafe; + this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList; this.supplementalFilters = supplementalFilters; this.maximumReadsAtLocus = maximumReadsAtLocus; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java similarity index 93% rename from java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java rename to java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index f9e5645a6..63ce86b8c 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk; +package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.utils.StingException; @@ -121,7 +121,7 @@ public class GATKArgumentCollection { @Element(required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) - public Boolean unsafe = false; + public ValidationExclusion.TYPE unsafe; @Element(required = false) @Argument(fullName = "max_reads_at_locus", shortName = "mrl", doc = "Sets the upper limit for the number of reads presented at a single locus. int.MAX_VALUE by default.", required = false) @@ -139,13 +139,14 @@ public class GATKArgumentCollection { /** What rule should we use when merging intervals */ @Element(required = false) @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use {ALL [DEFAULT],OVERLAPPING_ONLY,NONE}.", required = false) - public INTERVAL_MERGING_RULE intervalMerging = INTERVAL_MERGING_RULE.ALL; + public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; /** Should we enable rodWalkers? This is currently unsafe */ @Element(required = false) @Argument(fullName = "enableRodWalkers", shortName = "erw", doc = "Enable experimental rodWalker support. TEMPORARY HACK TO ALLOW EXPERIMENTATION WITH ROD WALKERS. [default is false]}.", required = false) public boolean enableRodWalkers = false; + /** * marshal the data out to a object * @@ -306,19 +307,5 @@ public class GATKArgumentCollection { return true; } - /** - * a class we use to determine the merging rules for intervals passed to the GATK - */ - public enum INTERVAL_MERGING_RULE { - ALL, // we merge both overlapping intervals and abutting intervals - OVERLAPPING_ONLY, // We merge intervals that are overlapping, but NOT ones that only abut each other - NONE; // we merge neither overlapping or abutting intervals, the list of intervals is sorted, but not merged - - public boolean check() { - if (this.compareTo(NONE) == 0) - throw new UnsupportedOperationException("We Currently do not support INTERVAL_MERGING_RULE.NONE"); - return true; - } - } } diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java b/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java new file mode 100644 index 000000000..c6c6c221c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/arguments/IntervalMergingRule.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.gatk.arguments; + + +/** + * a class we use to determine the merging rules for intervals passed to the GATK + */ +public enum IntervalMergingRule { + ALL, // we merge both overlapping intervals and abutting intervals + OVERLAPPING_ONLY, // We merge intervals that are overlapping, but NOT ones that only abut each other + NONE; // we merge neither overlapping or abutting intervals, the list of intervals is sorted, but not merged + + public boolean check() { + if (this.compareTo(NONE) == 0) + throw new UnsupportedOperationException("We Currently do not support IntervalMergingRule.NONE"); + return true; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java b/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java new file mode 100644 index 000000000..774081b5c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.gatk.arguments; + +import org.broadinstitute.sting.utils.cmdLine.EnumerationArgumentDefault; + +import java.util.ArrayList; +import java.util.List; + + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + * @author aaron + * + * Class ValidationExclusion + * + * a class for containing the exclusions from validation that the user + * wants. + */ +public class ValidationExclusion { + + // our validation options + + public enum TYPE { + ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard + ALLOW_EMPTY_INTERVAL_LIST, // allow the user to pass in an empty interval list + ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set + NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file + @EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL + ALL // do not check for all of the above conditions, DEFAULT + } + + // a storage for the passed in exclusions + List exclusions = new ArrayList(); + + public ValidationExclusion(List exclusionsList) { + exclusions.addAll(exclusionsList); + } + + public ValidationExclusion() {} + + /** + * do we contain the exclusion specified, or were we set to ALL + * @param t the exclusion case to test for + * @return true if we contain the exclusion or if we're set to ALL, false otherwise + */ + public boolean contains(TYPE t) { + return (exclusions.contains(TYPE.ALL) || exclusions.contains(t)); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 4c68c7c55..b8e405374 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -11,6 +11,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; @@ -19,7 +20,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import java.io.File; -import java.util.List; import java.util.Collection; /* @@ -164,14 +164,14 @@ public class SAMDataSource implements SimpleDataSource { iterator = applyDecoratingIterators(true, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); } else if (shard.getShardType() == Shard.ShardType.LOCUS) { iterator = seekLocus(shard); iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); } else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) || (shard.getShardType() == Shard.ShardType.READ_INTERVAL)) { @@ -179,7 +179,7 @@ public class SAMDataSource implements SimpleDataSource { iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), - reads.getSafetyChecking(), + reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); // add the new overlapping detection iterator, if we have a last interval and we're a read based shard @@ -430,21 +430,23 @@ public class SAMDataSource implements SimpleDataSource { * @param enableVerification Verify the order of reads. * @param wrappedIterator the raw data source. * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). - * @param beSafeP Another trigger for the verifying iterator? TODO: look into this. + * @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this. * @param supplementalFilters additional filters to apply to the reads. * @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null. */ private StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator, Double downsamplingFraction, - Boolean beSafeP, + Boolean noValidationOfReadOrder, Collection supplementalFilters) { // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); - if (beSafeP != null && beSafeP && enableVerification) + // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification, + // verify the read ordering by applying a sort order iterator + if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); for( SamRecordFilter supplementalFilter: supplementalFilters ) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index a70ebcc5d..cd73787dd 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -33,6 +33,7 @@ import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; @@ -101,7 +102,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) { String msg = String.format("The GATK requires your bam have %s sort order, but your BAM file header %s. Continuing beyond this point is unsafe -- please update your BAM file to have a compatible sort order using samtools sort or Picard MergeBamFiles", this.sortOrder, reader.getFileHeader().getAttribute("SO") == null ? "is missing the SO sort order flag" : "has an SO flag set to " + reader.getFileHeader().getAttribute("SO")); - if (reads.getSafetyChecking()) { + if (!reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.ALLOW_UNSET_BAM_SORT_ORDER)) { throw new PicardException(msg); } else if (!warnedUserAboutSortOrder) { warnedUserAboutSortOrder = true; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java index 31cef5e47..605b6d118 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculatePhaseLikelihoodsWalker.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.utils.cmdLine.Argument; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 32c7f3040..7ee3fefee 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; diff --git a/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java b/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java index 3ea6d0cc4..8689d9bca 100644 --- a/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java +++ b/java/src/org/broadinstitute/sting/utils/GATKErrorReport.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.simpleframework.xml.Element; import org.simpleframework. xml.ElementList; import org.simpleframework.xml.Serializer; diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index c17b596ba..e32afee4d 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -7,8 +7,9 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import java.io.File; import java.util.ArrayList; @@ -137,7 +138,7 @@ public class GenomeLocParser { * @param rule the merging rule we're using * @return a list of sorted, merged intervals. */ - public static List parseIntervals(List intervalsSource, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List parseIntervals(List intervalsSource, IntervalMergingRule rule) { List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource); Collections.sort(parsedIntervals); return GenomeLocParser.mergeIntervalLocations(parsedIntervals, rule); @@ -202,7 +203,7 @@ public class GenomeLocParser { * * @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order */ - public static List parseGenomeLocs(final String str, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List parseGenomeLocs(final String str, IntervalMergingRule rule) { // Null string means no filter. if (str == null) return null; @@ -214,9 +215,7 @@ public class GenomeLocParser { for (String loc : str.split(";")) locs.add(parseGenomeLoc(loc.trim())); Collections.sort(locs); - //logger.info(String.format("Going to process %d locations", locs.length)); - locs = mergeIntervalLocations(locs, rule); - logger.debug("Locations are:" + Utils.join(", ", locs)); + locs = mergeIntervalLocations(locs, rule); return locs; } catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e); @@ -242,9 +241,9 @@ public class GenomeLocParser { * * @return the list of merged locations */ - public static List mergeIntervalLocations(final List raw, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { logger.debug(" Raw locations are: " + Utils.join(", ", raw)); - if (raw.size() <= 1 || rule == GATKArgumentCollection.INTERVAL_MERGING_RULE.NONE) + if (raw.size() <= 1 || rule == IntervalMergingRule.NONE) return raw; else { ArrayList merged = new ArrayList(); @@ -254,7 +253,7 @@ public class GenomeLocParser { GenomeLoc curr = it.next(); if (prev.overlapsP(curr)) { prev = prev.merge(curr); - } else if (prev.contiguousP(curr) && rule == GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL) { + } else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) { prev = prev.merge(curr); } else { merged.add(prev); @@ -315,7 +314,7 @@ public class GenomeLocParser { * @param file_name * @param rule also merge abutting intervals */ - public static List intervalFileToList(final String file_name, GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public static List intervalFileToList(final String file_name, IntervalMergingRule rule) { /** * first try to read it as an interval file since that's well structured * we'll fail quickly if it's not a valid file. Then try to parse it as @@ -327,7 +326,7 @@ public class GenomeLocParser { // sometimes we see an empty file passed as a parameter, if so return an empty list if (inputFile.exists() && inputFile.length() < 1) { - if (GenomeAnalysisEngine.instance.getArguments().unsafe) + if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST) return new ArrayList(); else { Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " + diff --git a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index a8a8dcce8..7ec3637cc 100644 --- a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -104,7 +104,7 @@ public class BedParser { * @param rule the rule to merge intervals with * @return a list of genome locs, sorted and merged */ - public List getSortedAndMergedLocations(GATKArgumentCollection.INTERVAL_MERGING_RULE rule) { + public List getSortedAndMergedLocations(IntervalMergingRule rule) { List locs = new ArrayList(); locs.addAll(mLocations); Collections.sort(locs); diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java index e0597d513..9c7db1d01 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.cmdLine; import org.broadinstitute.sting.utils.StingException; import org.apache.log4j.Logger; +import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; @@ -270,8 +271,14 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return valueOf.invoke(null,value.trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); - for (Object val : vals) + Object defaultEnumeration = null; // as we look at options, record the default option if it exists + for (Object val : vals) { if (String.valueOf(val).equalsIgnoreCase(value)) return val; + try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } + catch (NoSuchFieldException e) { throw new StingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } + } + // if their argument has no value (null), and there's a default, return that default for the enum value + if (defaultEnumeration != null && value == null) return defaultEnumeration; throw new UnknownEnumeratedValueException(value, type.getName()); } else { Constructor ctor = type.getConstructor(String.class); diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java b/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java new file mode 100644 index 000000000..62e8d8b4b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/EnumerationArgumentDefault.java @@ -0,0 +1,40 @@ +package org.broadinstitute.sting.utils.cmdLine; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * @author aaron + *

+ * Annotation EnumerationArgumentDefault + *

+ * Allows the default argument value to be set for an enum; this allows us to treat enums as + * booleans on the command line. I.e. + * + * if we're using an enum Shape, + * + * enum shape { + * SQUARE, + * CIRCLE, + * @EnumerationArgumentDefault + * TRIANGLE + * } + * + * and a command line option -shape, the EnumerationArgumentDefault would allow you to say: + * -shape + * or + * -shape TRIANGLE + * + * would get -shape set to TRIANGLE, where: + * + * -shape SQUARE + * + * would set shape to SQUARE + * + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface EnumerationArgumentDefault { +} diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java similarity index 95% rename from java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java rename to java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java index 945db5180..74f313079 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionTest.java @@ -1,6 +1,7 @@ -package org.broadinstitute.sting.gatk; +package org.broadinstitute.sting.gatk.arguments; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.junit.After; import static org.junit.Assert.fail; import org.junit.Before; @@ -85,7 +86,7 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); - collect.unsafe = false; + collect.unsafe = ValidationExclusion.TYPE.ALL; collect.downsampleFraction = null; collect.downsampleCoverage = null; collect.intervals = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java index c8220ee38..8f30428d8 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java @@ -4,7 +4,7 @@ import static junit.framework.Assert.assertTrue; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import static org.junit.Assert.assertEquals; @@ -83,33 +83,33 @@ public class GenomeLocParserTest extends BaseTest { @Test(expected = RuntimeException.class) public void testParseBadLocations() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0", IntervalMergingRule.ALL); } @Test public void testParseGoodLocations() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9", IntervalMergingRule.ALL); } @Test(expected = RuntimeException.class) public void testParseGoodLocationsTooManySemiColons() { - GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;", GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;", IntervalMergingRule.ALL); } @Test public void testOverlappingGoodLocationsWithAbuttingFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.OVERLAPPING_ONLY); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY); assertEquals(1, locs.size()); } @Test public void testAbuttingGoodLocationsWithAbuttingOffFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-4;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.OVERLAPPING_ONLY); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-4;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY); assertEquals(2, locs.size()); } @Test public void testAbuttingGoodLocationsWithNoneFlag() { - List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", GATKArgumentCollection.INTERVAL_MERGING_RULE.NONE); + List locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.NONE); assertEquals(2, locs.size()); } diff --git a/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java b/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java index ef3d17b9b..914bf78c7 100644 --- a/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java +++ b/java/test/org/broadinstitute/sting/utils/bed/BedParserTest.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.bed; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -12,7 +12,6 @@ import org.junit.Assert; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; import java.util.List; @@ -64,7 +63,7 @@ public class BedParserTest extends BaseTest { @Test public void testLoadBedFileOverlapping() { BedParser parser = new BedParser(bedFile); - List location = parser.getSortedAndMergedLocations(GATKArgumentCollection.INTERVAL_MERGING_RULE.ALL); + List location = parser.getSortedAndMergedLocations(IntervalMergingRule.ALL); Assert.assertEquals(3, location.size()); } }