diff --git a/build.xml b/build.xml
index fc495f7cc..60c678591 100644
--- a/build.xml
+++ b/build.xml
@@ -520,6 +520,8 @@
+
+
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
index 9f92df6e0..8e3f753a8 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
@@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable {
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
public boolean matches( ArgumentDefinition definition, Object key ) {
- return definition.validation != null;
+ // We can perform some sort of validation for anything that isn't a flag.
+ return !definition.isFlag;
}
};
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
index 60ed8c899..351583c07 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable {
public final String label;
/**
- * Maps indicies of command line arguments to values paired with that argument.
+ * Maps indices of command line arguments to values paired with that argument.
*/
public final SortedMap> indices = new TreeMap>();
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
index edb212f2c..a580a1240 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@@ -41,6 +41,11 @@ import java.util.*;
* A parser for Sting command-line arguments.
*/
public class ParsingEngine {
+ /**
+ * The loaded argument sources along with their back definitions.
+ */
+ private Map argumentSourcesByDefinition = new HashMap();
+
/**
* A list of defined arguments against which command lines are matched.
* Package protected for testing access.
@@ -113,8 +118,13 @@ public class ParsingEngine {
*/
public void addArgumentSource( String sourceName, Class sourceClass ) {
List argumentsFromSource = new ArrayList();
- for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
- argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
+ for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
+ List argumentDefinitions = argumentSource.createArgumentDefinitions();
+ for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
+ argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
+ argumentsFromSource.add( argumentDefinition );
+ }
+ }
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
}
@@ -205,16 +215,25 @@ public class ParsingEngine {
throw new InvalidArgumentException( invalidArguments );
}
- // Find invalid argument values (arguments that fail the regexp test.
+ // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
Collection verifiableArguments =
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
Collection> invalidValues = new ArrayList>();
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
+ // Check to see whether an argument value was specified. Argument values must be provided
+ // when the argument name is specified and the argument is not a flag type.
+ for(ArgumentMatch verifiableMatch: verifiableMatches) {
+ ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
+ if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
+ invalidValues.add(new Pair(verifiableArgument,null));
+ }
+
+ // Ensure that the field contents meet the validation criteria specified by the regular expression.
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
for( String value: verifiableMatch.values() ) {
- if( !value.matches(verifiableArgument.validation) )
+ if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
invalidValues.add( new Pair(verifiableArgument, value) );
}
}
@@ -527,10 +546,14 @@ class InvalidArgumentValueException extends ArgumentException {
private static String formatArguments( Collection> invalidArgumentValues ) {
StringBuilder sb = new StringBuilder();
for( Pair invalidValue: invalidArgumentValues ) {
- sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
- invalidValue.first.fullName,
- invalidValue.second,
- invalidValue.first.validation) );
+ if(invalidValue.getSecond() == null)
+ sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
+ invalidValue.first.fullName) );
+ else
+ sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
+ invalidValue.first.fullName,
+ invalidValue.second,
+ invalidValue.first.validation) );
}
return sb.toString();
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 6064806f3..572970349 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -893,6 +893,7 @@ public class SAMDataSource {
* Custom representation of interval bounds.
* Makes it simpler to track current position.
*/
+ private int[] intervalContigIndices;
private int[] intervalStarts;
private int[] intervalEnds;
@@ -917,12 +918,14 @@ public class SAMDataSource {
if(foundMappedIntervals) {
if(keepOnlyUnmappedReads)
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
+ this.intervalContigIndices = new int[intervals.size()];
this.intervalStarts = new int[intervals.size()];
this.intervalEnds = new int[intervals.size()];
int i = 0;
for(GenomeLoc interval: intervals) {
- intervalStarts[i] = (int)interval.getStart();
- intervalEnds[i] = (int)interval.getStop();
+ intervalContigIndices[i] = interval.getContigIndex();
+ intervalStarts[i] = interval.getStart();
+ intervalEnds[i] = interval.getStop();
i++;
}
}
@@ -961,11 +964,10 @@ public class SAMDataSource {
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
if(!keepOnlyUnmappedReads) {
// Mapped read filter; check against GenomeLoc-derived bounds.
- if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
- (candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
- // This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
+ if(readEndsOnOrAfterStartingBound(candidateRead)) {
+ // This read ends after the current interval begins.
// Promising, but this read must be checked against the ending bound.
- if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
+ if(readStartsOnOrBeforeEndingBound(candidateRead)) {
// Yes, this read is within both bounds. This must be our next read.
nextRead = candidateRead;
break;
@@ -993,6 +995,37 @@ public class SAMDataSource {
candidateRead = iterator.next();
}
}
+
+ /**
+ * Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
+ * end will be distorted, so rely only on the alignment start.
+ * @param read The read to position-check.
+ * @return True if the read starts after the current bounds. False otherwise.
+ */
+ private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
+ return
+ // Read ends on a later contig, or...
+ read.getReferenceIndex() > intervalContigIndices[currentBound] ||
+ // Read ends of this contig...
+ (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
+ // either after this location, or...
+ (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
+ // read is unmapped but positioned and alignment start is on or after this start point.
+ (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
+ }
+
+ /**
+ * Check whether the read lies before the end of the current bound.
+ * @param read The read to position-check.
+ * @return True if the read starts after the current bounds. False otherwise.
+ */
+ private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
+ return
+ // Read starts on a prior contig, or...
+ read.getReferenceIndex() < intervalContigIndices[currentBound] ||
+ // Read starts on this contig and the alignment start is registered before this end point.
+ (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
+ }
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
index 5f8f19892..4a4f6f6af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
@@ -342,12 +342,12 @@ public class DiffEngine {
return reader.readFromFile(file, maxElementsToRead);
}
- public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
+ public static boolean simpleDiffFiles(File masterFile, File testFile, int maxElementsToRead, DiffEngine.SummaryReportParams params) {
DiffEngine diffEngine = new DiffEngine();
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
- DiffElement master = diffEngine.createDiffableFromFile(masterFile);
- DiffElement test = diffEngine.createDiffableFromFile(testFile);
+ DiffElement master = diffEngine.createDiffableFromFile(masterFile, maxElementsToRead);
+ DiffElement test = diffEngine.createDiffableFromFile(testFile, maxElementsToRead);
List diffs = diffEngine.diff(master, test);
diffEngine.reportSummarizedDifferences(diffs, params);
return true;
diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java
index bea9eaec5..0194e114a 100644
--- a/public/java/test/org/broadinstitute/sting/MD5DB.java
+++ b/public/java/test/org/broadinstitute/sting/MD5DB.java
@@ -47,6 +47,7 @@ public class MD5DB {
/**
* Subdirectory under the ant build directory where we store integration test md5 results
*/
+ private static final int MAX_RECORDS_TO_READ = 10000;
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
@@ -78,8 +79,8 @@ public class MD5DB {
* @return
*/
public static String getMD5FilePath(final String md5, final String valueIfNotFound) {
- // we prefer the local db to the global DB, so match it first
- for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) {
+ // we prefer the global db to the local DB, so match it first
+ for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) {
File f = getFileForMD5(md5, dir);
if ( f.exists() && f.canRead() )
return f.getPath();
@@ -232,7 +233,7 @@ public class MD5DB {
// inline differences
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
- boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
+ boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params);
if ( success )
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
pathToExpectedMD5File, pathToFileMD5File);