diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..8623fa076
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,20 @@
+/*.bam
+/*.bai
+/*.bed
+*.idx
+*~
+/*.vcf
+/*.txt
+/*.csh
+/.*
+/*.pdf
+/*.eval
+*.ipr
+*.iws
+*.iml
+.DS_Store
+queueScatterGather
+/foo*
+/bar*
+integrationtests/
+public/testdata/onTheFlyOutputTest.vcf
diff --git a/build.xml b/build.xml
index 446982a44..6ca959c38 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,8 @@
+
+
@@ -35,18 +37,25 @@
+
+
+
+
+
+
-
+
-
+
@@ -60,7 +69,7 @@
-
+
@@ -82,7 +91,7 @@
-
+
@@ -113,7 +122,7 @@
-
+
@@ -154,7 +163,7 @@
-
+
@@ -211,11 +220,11 @@
-
+
-
+
@@ -224,11 +233,11 @@
-
+
-
+
@@ -266,7 +275,7 @@
-
+
@@ -312,13 +321,13 @@
-
+
-
+
@@ -327,11 +336,11 @@
-
+
-
@@ -341,9 +350,9 @@
-
+
-
+
@@ -362,14 +371,14 @@
-
+
-
+
-
-
+
@@ -413,9 +422,9 @@
-
+
-
+
@@ -424,12 +433,12 @@
-
+
-
+
@@ -532,6 +541,11 @@
+
+
+
+
+
@@ -539,7 +553,7 @@
-
+
@@ -551,6 +565,12 @@
+
+
+
+
+
+
@@ -579,6 +599,10 @@
+
+
+
+
@@ -593,6 +617,10 @@
+
+
+
+
@@ -605,28 +633,7 @@
-
@@ -643,6 +650,9 @@
+
+
+
@@ -682,20 +692,7 @@
-
+
@@ -780,10 +777,6 @@
-
@@ -800,10 +793,6 @@
-
@@ -851,6 +840,8 @@
+
+
@@ -1187,19 +1178,18 @@
-
-
+
-
+
-
+
diff --git a/public/R/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
similarity index 100%
rename from public/R/queueJobReport.R
rename to public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
similarity index 100%
rename from public/R/src/gsalib/DESCRIPTION
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.error.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.getargs.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.message.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.plot.venn.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.read.eval.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.read.gatkreport.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.read.squidmetrics.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.read.vcf.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
similarity index 100%
rename from public/R/src/gsalib/R/gsa.warn.R
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
similarity index 100%
rename from public/R/src/gsalib/Read-and-delete-me
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
similarity index 100%
rename from public/R/src/gsalib/data/tearsheetdrop.jpg
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.error.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.getargs.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.message.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.plot.venn.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.read.eval.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.read.gatkreport.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.read.squidmetrics.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.read.vcf.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsa.warn.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
similarity index 100%
rename from public/R/src/gsalib/man/gsalib-package.Rd
rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
index 351583c07..c0823e5c5 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable {
/**
* Maps indices of command line arguments to values paired with that argument.
*/
- public final SortedMap> indices = new TreeMap>();
+ public final SortedMap> sites = new TreeMap>();
/**
* An ordered, freeform collection of tags.
@@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable {
}
/**
- * A simple way of indicating that an argument with the given label and definition exists at this index.
+ * A simple way of indicating that an argument with the given label and definition exists at this site.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
- * @param index Position of the argument. Must not be null.
+ * @param site Position of the argument. Must not be null.
* @param tags ordered freeform text tags associated with this argument.
*/
- public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) {
- this( label, definition, index, null, tags );
+ public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) {
+ this( label, definition, site, null, tags );
}
/**
- * A simple way of indicating that an argument with the given label and definition exists at this index.
+ * A simple way of indicating that an argument with the given label and definition exists at this site.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
- * @param index Position of the argument. Must not be null.
+ * @param site Position of the argument. Must not be null.
* @param value Value for the argument at this position.
* @param tags ordered freeform text tags associated with this argument.
*/
- private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) {
+ private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) {
this.label = label;
this.definition = definition;
ArrayList values = new ArrayList();
if( value != null )
values.add(value);
- indices.put(index,values );
+ sites.put(site,values );
this.tags = tags;
}
@@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable {
ArgumentMatch otherArgumentMatch = (ArgumentMatch)other;
return this.definition.equals(otherArgumentMatch.definition) &&
this.label.equals(otherArgumentMatch.label) &&
- this.indices.equals(otherArgumentMatch.indices) &&
+ this.sites.equals(otherArgumentMatch.sites) &&
this.tags.equals(otherArgumentMatch.tags);
}
@@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable {
* @param key Key which specifies the transform.
* @return A variant of this ArgumentMatch with all keys transformed.
*/
+ @SuppressWarnings("unchecked")
ArgumentMatch transform(Multiplexer multiplexer, Object key) {
- SortedMap> newIndices = new TreeMap>();
- for(Map.Entry> index: indices.entrySet()) {
+ SortedMap> newIndices = new TreeMap>();
+ for(Map.Entry> site: sites.entrySet()) {
List newEntries = new ArrayList();
- for(String entry: index.getValue())
+ for(String entry: site.getValue())
newEntries.add(multiplexer.transformArgument(key,entry));
- newIndices.put(index.getKey(),newEntries);
+ newIndices.put(site.getKey(),newEntries);
}
ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition);
- newArgumentMatch.indices.putAll(newIndices);
+ newArgumentMatch.sites.putAll(newIndices);
return newArgumentMatch;
}
@@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable {
public Iterator iterator() {
return new Iterator() {
/**
- * Iterate over each the available index.
+ * Iterate over each the available site.
*/
- private Iterator indexIterator = null;
+ private Iterator siteIterator = null;
/**
* Iterate over each available token.
@@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable {
private Iterator tokenIterator = null;
/**
- * The next index to return. Null if none remain.
+ * The next site to return. Null if none remain.
*/
- Integer nextIndex = null;
+ ArgumentMatchSite nextSite = null;
/**
* The next token to return. Null if none remain.
@@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable {
String nextToken = null;
{
- indexIterator = indices.keySet().iterator();
+ siteIterator = sites.keySet().iterator();
prepareNext();
}
@@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable {
* @return True if there's another token waiting in the wings. False otherwise.
*/
public boolean hasNext() {
- return nextToken != null;
+ return nextToken != null;
}
/**
@@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable {
* @return The next ArgumentMatch in the series. Should never be null.
*/
public ArgumentMatch next() {
- if( nextIndex == null || nextToken == null )
+ if( nextSite == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" );
- ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags );
+ ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags );
prepareNext();
return match;
}
/**
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
- * initialize nextIndex / nextToken to null.
+ * initialize nextSite / nextToken to null.
*/
private void prepareNext() {
if( tokenIterator != null && tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
}
else {
- nextIndex = null;
+ nextSite = null;
nextToken = null;
// Do a nested loop. While more data is present in the inner loop, grab that data.
// Otherwise, troll the outer iterator looking for more data.
- while( indexIterator.hasNext() ) {
- nextIndex = indexIterator.next();
- if( indices.get(nextIndex) != null ) {
- tokenIterator = indices.get(nextIndex).iterator();
+ while( siteIterator.hasNext() ) {
+ nextSite = siteIterator.next();
+ if( sites.get(nextSite) != null ) {
+ tokenIterator = sites.get(nextSite).iterator();
if( tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
break;
@@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable {
* @param other The other match to merge into.
*/
public void mergeInto( ArgumentMatch other ) {
- indices.putAll(other.indices);
+ sites.putAll(other.sites);
}
/**
* Associate a value with this merge maapping.
- * @param index index of the command-line argument to which this value is mated.
+ * @param site site of the command-line argument to which this value is mated.
* @param value Text representation of value to add.
*/
- public void addValue( int index, String value ) {
- if( !indices.containsKey(index) || indices.get(index) == null )
- indices.put(index, new ArrayList() );
- indices.get(index).add(value);
+ public void addValue( ArgumentMatchSite site, String value ) {
+ if( !sites.containsKey(site) || sites.get(site) == null )
+ sites.put(site, new ArrayList() );
+ sites.get(site).add(value);
}
/**
* Does this argument already have a value at the given site?
* Arguments are only allowed to be single-valued per site, and
* flags aren't allowed a value at all.
- * @param index Index at which to check for values.
+ * @param site Site at which to check for values.
* @return True if the argument has a value at the given site. False otherwise.
*/
- public boolean hasValueAtSite( int index ) {
- return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
+ public boolean hasValueAtSite( ArgumentMatchSite site ) {
+ return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag();
}
/**
@@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable {
*/
public List values() {
List values = new ArrayList();
- for( int index: indices.keySet() ) {
- if( indices.get(index) != null )
- values.addAll(indices.get(index));
+ for( ArgumentMatchSite site: sites.keySet() ) {
+ if( sites.get(site) != null )
+ values.addAll(sites.get(site));
}
return values;
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
new file mode 100644
index 000000000..8a4120101
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+/**
+ * Which source and the index within the source where an argument match was found.
+ */
+public class ArgumentMatchSite implements Comparable {
+ private final ArgumentMatchSource source;
+ private final int index;
+
+ public ArgumentMatchSite(ArgumentMatchSource source, int index) {
+ this.source = source;
+ this.index = index;
+ }
+
+ public ArgumentMatchSource getSource() {
+ return source;
+ }
+
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ ArgumentMatchSite that = (ArgumentMatchSite) o;
+
+ return (index == that.index) && (source == null ? that.source == null : source.equals(that.source));
+ }
+
+ @Override
+ public int hashCode() {
+ int result = source != null ? source.hashCode() : 0;
+ // Generated by intellij. No other special reason to this implementation. -ks
+ result = 31 * result + index;
+ return result;
+ }
+
+ @Override
+ public int compareTo(ArgumentMatchSite that) {
+ int comp = this.source.compareTo(that.source);
+ if (comp != 0)
+ return comp;
+
+ // Both files are the same.
+ if (this.index == that.index)
+ return 0;
+ return this.index < that.index ? -1 : 1;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
new file mode 100644
index 000000000..ed2700006
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import java.io.File;
+
+/**
+ * Where an argument match originated, via the commandline or a file.
+ */
+public class ArgumentMatchSource implements Comparable {
+ public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null);
+
+ private final ArgumentMatchSourceType type;
+ private final File file;
+
+ /**
+ * Creates an argument match source from the specified file.
+ * @param file File specifying the arguments. Must not be null.
+ */
+ public ArgumentMatchSource(File file) {
+ this(ArgumentMatchSourceType.File, file);
+ }
+
+ private ArgumentMatchSource(ArgumentMatchSourceType type, File file) {
+ if (type == ArgumentMatchSourceType.File && file == null)
+ throw new IllegalArgumentException("An argument match source of type File cannot have a null file.");
+ this.type = type;
+ this.file = file;
+ }
+
+ public ArgumentMatchSourceType getType() {
+ return type;
+ }
+
+ public File getFile() {
+ return file;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ ArgumentMatchSource that = (ArgumentMatchSource) o;
+
+ return (type == that.type) && (file == null ? that.file == null : file.equals(that.file));
+ }
+
+ @Override
+ public int hashCode() {
+ int result = type != null ? type.hashCode() : 0;
+ result = 31 * result + (file != null ? file.hashCode() : 0);
+ return result;
+ }
+
+ /**
+ * Compares two sources, putting the command line first, then files.
+ */
+ @Override
+ public int compareTo(ArgumentMatchSource that) {
+ int comp = this.type.compareTo(that.type);
+ if (comp != 0)
+ return comp;
+
+ File f1 = this.file;
+ File f2 = that.file;
+
+ if ((f1 == null) ^ (f2 == null)) {
+ // If one of the files is null and the other is not
+ // put the null file first
+ return f1 == null ? -1 : 1;
+ }
+
+ return f1 == null ? 0 : f1.compareTo(f2);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
new file mode 100644
index 000000000..3ff6e21d4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+/**
+ * Type of where an argument match originated, via the commandline or a file.
+ */
+public enum ArgumentMatchSourceType {
+ CommandLine, File
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
index 52d3b8232..3da28c420 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
@@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable {
* Collection matches from argument definition to argument value.
* Package protected access is deliberate.
*/
- Map argumentMatches = new TreeMap();
+ Map argumentMatches = new TreeMap();
/**
* Provide a place to put command-line argument values that don't seem to belong to
@@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable {
* @param site Site at which to check.
* @return True if the site has a match. False otherwise.
*/
- boolean hasMatch( int site ) {
+ boolean hasMatch( ArgumentMatchSite site ) {
return argumentMatches.containsKey( site );
}
@@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable {
* @return The match present at the given site.
* @throws IllegalArgumentException if site does not contain a match.
*/
- ArgumentMatch getMatch( int site ) {
+ ArgumentMatch getMatch( ArgumentMatchSite site ) {
if( !argumentMatches.containsKey(site) )
throw new IllegalArgumentException( "Site does not contain an argument: " + site );
return argumentMatches.get(site);
@@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable {
/**
* Return all argument matches of this source.
+ * @param parsingEngine Parsing engine.
* @param argumentSource Argument source to match.
* @return List of all matches.
*/
@@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable {
* TODO: Generify this.
* @param multiplexer Multiplexer that controls the transformation process.
* @param key Key which specifies the transform.
+ * @return new argument matches.
*/
ArgumentMatches transform(Multiplexer multiplexer, Object key) {
ArgumentMatches newArgumentMatches = new ArgumentMatches();
@@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable {
for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
argumentMatch.mergeInto( match );
- for( int index: match.indices.keySet() )
- argumentMatches.put( index, argumentMatch );
+ for( ArgumentMatchSite site: match.sites.keySet() )
+ argumentMatches.put( site, argumentMatch );
definitionExists = true;
}
}
if( !definitionExists ) {
- for( int index: match.indices.keySet() )
- argumentMatches.put( index, match );
+ for( ArgumentMatchSite site: match.sites.keySet() )
+ argumentMatches.put( site, match );
}
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
index d88e7030e..bed1e710e 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.HelpFormatter;
import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.Locale;
+import java.util.*;
public abstract class CommandLineProgram {
@@ -155,6 +152,7 @@ public abstract class CommandLineProgram {
*
* @param clp the command line program to execute
* @param args the command line arguments passed in
+ * @param dryRun dry run
* @throws Exception when an exception occurs
*/
@SuppressWarnings("unchecked")
@@ -176,6 +174,8 @@ public abstract class CommandLineProgram {
ParsingEngine parser = clp.parser = new ParsingEngine(clp);
parser.addArgumentSource(clp.getClass());
+ Map> parsedArgs;
+
// process the args
if (clp.canAddArgumentsDynamically()) {
// if the command-line program can toss in extra args, fetch them and reparse the arguments.
@@ -196,14 +196,14 @@ public abstract class CommandLineProgram {
Class[] argumentSources = clp.getArgumentSources();
for (Class argumentSource : argumentSources)
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
- parser.parse(args);
+ parsedArgs = parser.parse(args);
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
if ( ! dryRun ) parser.validate();
} else {
- parser.parse(args);
+ parsedArgs = parser.parse(args);
if ( ! dryRun ) {
if (isHelpPresent(parser))
@@ -230,7 +230,7 @@ public abstract class CommandLineProgram {
}
// regardless of what happens next, generate the header information
- HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
+ HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs);
// call the execute
CommandLineProgram.result = clp.execute();
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
index ad58553c1..0fac195e1 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@@ -26,6 +26,7 @@
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
+import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.HelpFormatter;
+import java.io.File;
+import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
@@ -101,6 +104,8 @@ public class ParsingEngine {
if(clp != null)
argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors());
argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS);
+
+ addArgumentSource(ParsingEngineArgumentFiles.class);
}
/**
@@ -149,21 +154,43 @@ public class ParsingEngine {
* command-line arguments to the arguments that are actually
* required.
* @param tokens Tokens passed on the command line.
+ * @return The parsed arguments by file.
*/
- public void parse( String[] tokens ) {
+ public SortedMap> parse( String[] tokens ) {
argumentMatches = new ArgumentMatches();
+ SortedMap> parsedArgs = new TreeMap>();
- int lastArgumentMatchSite = -1;
+ List cmdLineTokens = Arrays.asList(tokens);
+ parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs);
- for( int i = 0; i < tokens.length; i++ ) {
- String token = tokens[i];
+ ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles();
+
+ // Load the arguments ONLY into the argument files.
+ // Validation may optionally run on the rest of the arguments.
+ loadArgumentsIntoObject(argumentFiles);
+
+ for (File file: argumentFiles.files) {
+ List fileTokens = getArguments(file);
+ parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs);
+ }
+
+ return parsedArgs;
+ }
+
+ private void parse(ArgumentMatchSource matchSource, List tokens,
+ ArgumentMatches argumentMatches, SortedMap> parsedArgs) {
+ ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1);
+
+ int i = 0;
+ for (String token: tokens) {
// If the token is of argument form, parse it into its own argument match.
// Otherwise, pair it with the most recently used argument discovered.
+ ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i);
if( isArgumentForm(token) ) {
- ArgumentMatch argumentMatch = parseArgument( token, i );
+ ArgumentMatch argumentMatch = parseArgument( token, site );
if( argumentMatch != null ) {
argumentMatches.mergeInto( argumentMatch );
- lastArgumentMatchSite = i;
+ lastArgumentMatchSite = site;
}
}
else {
@@ -171,10 +198,31 @@ public class ParsingEngine {
!argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite))
argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token );
else
- argumentMatches.MissingArgument.addValue( i, token );
+ argumentMatches.MissingArgument.addValue( site, token );
}
+ i++;
}
+
+ parsedArgs.put(matchSource, tokens);
+ }
+
+ private List getArguments(File file) {
+ try {
+ if (file.getAbsolutePath().endsWith(".list")) {
+ return getListArguments(file);
+ }
+ } catch (IOException e) {
+ throw new UserException.CouldNotReadInputFile(file, e);
+ }
+ throw new UserException.CouldNotReadInputFile(file, "file extension is not .list");
+ }
+
+ private List getListArguments(File file) throws IOException {
+ ArrayList argsList = new ArrayList();
+ for (String line: FileUtils.readLines(file))
+ argsList.addAll(Arrays.asList(Utils.escapeExpressions(line)));
+ return argsList;
}
public enum ValidationType { MissingRequiredArgument,
@@ -495,7 +543,7 @@ public class ParsingEngine {
* @param position The position of the token in question.
* @return ArgumentMatch associated with this token, or null if no match exists.
*/
- private ArgumentMatch parseArgument( String token, int position ) {
+ private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) {
if( !isArgumentForm(token) )
throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );
@@ -580,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException {
private static String formatArguments( ArgumentMatch invalidValues ) {
StringBuilder sb = new StringBuilder();
- for( int index: invalidValues.indices.keySet() )
- for( String value: invalidValues.indices.get(index) ) {
- sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) );
+ for( ArgumentMatchSite site: invalidValues.sites.keySet() )
+ for( String value: invalidValues.sites.get(site) ) {
+ switch (site.getSource().getType()) {
+ case CommandLine:
+ sb.append( String.format("%nInvalid argument value '%s' at position %d.",
+ value, site.getIndex()) );
+ break;
+ case File:
+ sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.",
+ value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) );
+ break;
+ default:
+ throw new RuntimeException( String.format("Unexpected argument match source type: %s",
+ site.getSource().getType()));
+ }
if(value != null && Utils.dupString(' ',value.length()).equals(value))
sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace.");
}
@@ -635,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException {
private static String formatArguments(ArgumentDefinition definition, String argumentPassed) {
return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions));
}
-}
\ No newline at end of file
+}
+
+/**
+ * Container class to store the list of argument files.
+ * The files will be parsed after the command line arguments.
+ */
+class ParsingEngineArgumentFiles {
+ @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false)
+ public List files = new ArrayList();
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
index a070cb5a1..452309e89 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@@ -68,7 +68,7 @@ public abstract class ParsingMethod {
* @return An argument match. Definition field will be populated if a match was found or
* empty if no appropriate definition could be found.
*/
- public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) {
+ public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) {
// If the argument is valid, parse out the argument.
Matcher matcher = pattern.matcher(token);
@@ -102,9 +102,7 @@ public abstract class ParsingMethod {
// Try to find a matching argument. If found, label that as the match. If not found, add the argument
// with a null definition.
- ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags);
-
- return argumentMatch;
+ return new ArgumentMatch(argument,argumentDefinition,position,tags);
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 2b6c280c8..20efc3173 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -221,12 +221,12 @@ public class GenomeAnalysisEngine {
ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
// execute the microscheduler, storing the results
- Object result = microScheduler.execute(this.walker, shardStrategy);
+ return microScheduler.execute(this.walker, shardStrategy);
//monitor.stop();
//logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));
- return result;
+ //return result;
}
/**
@@ -301,6 +301,10 @@ public class GenomeAnalysisEngine {
return method;
}
+ protected void setDownsamplingMethod(DownsamplingMethod method) {
+ argCollection.setDownsamplingMethod(method);
+ }
+
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
@@ -390,7 +394,9 @@ public class GenomeAnalysisEngine {
/**
* Get the sharding strategy given a driving data source.
*
+ * @param readsDataSource readsDataSource
* @param drivingDataSource Data on which to shard.
+ * @param intervals intervals
* @return the sharding strategy
*/
protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
@@ -427,7 +433,7 @@ public class GenomeAnalysisEngine {
return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
}
- ShardStrategy shardStrategy = null;
+ ShardStrategy shardStrategy;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
long SHARD_SIZE = 100000L;
@@ -436,6 +442,8 @@ public class GenomeAnalysisEngine {
if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
if (intervals != null && !intervals.isEmpty()) {
+ if (readsDataSource == null)
+ throw new IllegalArgumentException("readsDataSource is null");
if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
@@ -499,7 +507,8 @@ public class GenomeAnalysisEngine {
*/
private void initializeTempDirectory() {
File tempDir = new File(System.getProperty("java.io.tmpdir"));
- tempDir.mkdirs();
+ if (!tempDir.exists() && !tempDir.mkdirs())
+ throw new UserException.BadTmpDir("Unable to create directory");
}
/**
@@ -707,6 +716,7 @@ public class GenomeAnalysisEngine {
* @param reads Reads data source.
* @param reference Reference data source.
* @param rods a collection of the reference ordered data tracks
+ * @param manager manager
*/
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) {
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
@@ -735,15 +745,22 @@ public class GenomeAnalysisEngine {
/**
* Gets a data source for the given set of reads.
*
+ * @param argCollection arguments
+ * @param genomeLocParser parser
+ * @param refReader reader
* @return A data source for the given set of reads.
*/
private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
DownsamplingMethod method = getDownsamplingMethod();
+ // Synchronize the method back into the collection so that it shows up when
+ // interrogating for the downsample method during command line recreation.
+ setDownsamplingMethod(method);
+
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
- SAMDataSource dataSource = new SAMDataSource(
+ return new SAMDataSource(
samReaderIDs,
genomeLocParser,
argCollection.useOriginalBaseQualities,
@@ -759,14 +776,12 @@ public class GenomeAnalysisEngine {
refReader,
argCollection.defaultBaseQualities,
!argCollection.disableLowMemorySharding);
- return dataSource;
}
/**
* Opens a reference sequence file paired with an index. Only public for testing purposes
*
* @param refFile Handle to a reference sequence file. Non-null.
- * @return A thread-safe file wrapper.
*/
public void setReferenceDataSource(File refFile) {
this.referenceDataSource = new ReferenceDataSource(refFile);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 70819a092..4d7e4e244 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -138,8 +138,8 @@ public class GATKArgumentCollection {
/**
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
- * a default downsampling mechanism, return null.
- * @return The explicitly specified downsampling mechanism, or null if none exists.
+ * a default downsampling mechanism, return the default.
+ * @return The explicitly specified downsampling mechanism, or the default if none exists.
*/
public DownsamplingMethod getDownsamplingMethod() {
if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
@@ -149,6 +149,18 @@ public class GATKArgumentCollection {
return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
}
+ /**
+ * Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments.
+ * @param method The downsampling mechanism.
+ */
+ public void setDownsamplingMethod(DownsamplingMethod method) {
+ if (method == null)
+ throw new IllegalArgumentException("method is null");
+ downsamplingType = method.type;
+ downsampleCoverage = method.toCoverage;
+ downsampleFraction = method.toFraction;
+ }
+
// --------------------------------------------------------------------------------------------------------------
//
// BAQ arguments
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 74d39ecb0..8452aadfd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File;
import java.lang.reflect.InvocationTargetException;
@@ -57,6 +58,8 @@ import java.util.*;
* Converts shards to SAM iterators over the specified region
*/
public class SAMDataSource {
+ final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
+
/** Backing support for reads. */
protected final ReadProperties readProperties;
@@ -644,7 +647,9 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
byte defaultBaseQualities) {
- wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
+ if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
+ // only wrap if we are replacing the original qualitiies or using a default base quality
+ wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away
@@ -756,6 +761,7 @@ public class SAMDataSource {
public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) {
for(SAMReaderID readerID: readerIDs) {
SAMFileReader reader = new SAMFileReader(readerID.samFile);
+ reader.setSAMRecordFactory(factory);
reader.enableFileSource(true);
reader.enableIndexMemoryMapping(false);
if(!enableLowMemorySharding)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
index 2f30d12a8..9a89d2086 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* An iterator which does post-processing of a read, including potentially wrapping
@@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists.
*/
public SAMRecord next() {
- return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities);
+ SAMRecord rec = wrappedIterator.next();
+
+ // if we are using default quals, check if we need them, and add if necessary.
+ // 1. we need if reads are lacking or have incomplete quality scores
+ // 2. we add if defaultBaseQualities has a positive value
+ if (defaultBaseQualities >= 0) {
+ byte reads [] = rec.getReadBases();
+ byte quals [] = rec.getBaseQualities();
+ if (quals == null || quals.length < reads.length) {
+ byte new_quals [] = new byte [reads.length];
+ for (int i=0; i GLs, List Alleles,
+ protected abstract void getLog10PNonRef(Map GLs, List Alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
index f87eae781..1c2d82ab7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@@ -51,9 +51,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
super(UAC, N, logger, verboseWriter);
}
- public void getLog10PNonRef(RefMetaDataTracker tracker,
- ReferenceContext ref,
- Map GLs, List alleles,
+ public void getLog10PNonRef(Map GLs, List alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java
index f4195e5f0..27842a8bf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java
@@ -52,9 +52,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
AFMatrix = new AlleleFrequencyMatrix(N);
}
- protected void getLog10PNonRef(RefMetaDataTracker tracker,
- ReferenceContext ref,
- Map GLs, List alleles,
+ protected void getLog10PNonRef(Map GLs, List alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
initializeAFMatrix(GLs);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 58b33924b..aea63b61d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -30,12 +30,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
-import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java
index 3652763de..4f378b24a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java
@@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.ArrayList;
+import java.util.List;
/**
* Created by IntelliJ IDEA.
@@ -15,11 +16,11 @@ import java.util.ArrayList;
public class MultiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
- private ArrayList alleleList;
+ private List alleleList;
private int depth;
public MultiallelicGenotypeLikelihoods(String sample,
- ArrayList A,
+ List A,
double[] log10Likelihoods, int depth) {
/* Check for consistency between likelihood vector and number of alleles */
int numAlleles = A.size();
@@ -40,7 +41,7 @@ public class MultiallelicGenotypeLikelihoods {
return GLs;
}
- public ArrayList getAlleles() {
+ public List getAlleles() {
return alleleList;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index b72b68f9f..9f0585d13 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -325,7 +325,7 @@ public class UnifiedGenotyperEngine {
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyPosteriors.get());
- afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+ afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
// find the most likely frequency
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
@@ -383,7 +383,7 @@ public class UnifiedGenotyperEngine {
// the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyPosteriors.get());
- afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+ afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
@@ -391,7 +391,7 @@ public class UnifiedGenotyperEngine {
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyPosteriors.get());
- afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+ afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
@@ -400,7 +400,7 @@ public class UnifiedGenotyperEngine {
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyPosteriors.get());
- afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+ afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
@@ -447,6 +447,78 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
+ // A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs
+ public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
+
+ // initialize the data for this thread if that hasn't been done yet
+ if ( afcm.get() == null ) {
+ log10AlleleFrequencyPosteriors.set(new double[N+1]);
+ afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
+ }
+
+ // estimate our confidence in a reference call and return
+ if ( vc.getNSamples() == 0 )
+ return null;
+
+ // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
+ clearAFarray(log10AlleleFrequencyPosteriors.get());
+ afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+
+ // find the most likely frequency
+ int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
+
+ // calculate p(f>0)
+ double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
+ double sum = 0.0;
+ for (int i = 1; i <= N; i++)
+ sum += normalizedPosteriors[i];
+ double PofF = Math.min(sum, 1.0); // deal with precision errors
+
+ double phredScaledConfidence;
+ if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+ phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
+ if ( Double.isInfinite(phredScaledConfidence) )
+ phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
+ } else {
+ phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
+ if ( Double.isInfinite(phredScaledConfidence) ) {
+ sum = 0.0;
+ for (int i = 1; i <= N; i++) {
+ if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
+ break;
+ sum += log10AlleleFrequencyPosteriors.get()[i];
+ }
+ phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
+ }
+ }
+
+ // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
+ if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
+ // technically, at this point our confidence in a reference call isn't accurately estimated
+ // because it didn't take into account samples with no data, so let's get a better estimate
+ return null;
+ }
+
+ // create the genotypes
+ Map genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
+
+ // *** note that calculating strand bias involves overwriting data structures, so we do that last
+ HashMap attributes = new HashMap();
+
+ int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
+
+ Set myAlleles = new HashSet(vc.getAlleles());
+ // strip out the alternate allele if it's a ref call
+ if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
+ myAlleles = new HashSet(1);
+ myAlleles.add(vc.getReference());
+ }
+ VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
+ myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, vc.getReferenceBaseForIndel());
+
+ return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
+ }
+
private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
index 9edf5b5d4..0df7b7cbd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@@ -32,11 +32,17 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
@@ -45,9 +51,6 @@ import java.util.LinkedHashMap;
public class PairHMMIndelErrorModel {
public static final int BASE_QUAL_THRESHOLD = 20;
- private final double logGapOpenProbability;
- private final double logGapContinuationProbability;
-
private boolean DEBUG = false;
private boolean bandedLikelihoods = false;
@@ -89,8 +92,6 @@ public class PairHMMIndelErrorModel {
}
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
- this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
- this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
this.DEBUG = deb;
this.bandedLikelihoods = bandedLikelihoods;
@@ -98,13 +99,14 @@ public class PairHMMIndelErrorModel {
this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
this.GAP_OPEN_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
+ double gop = -indelGOP/10.0;
+ double gcp = -indelGCP/10.0;
+
for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
- GAP_OPEN_PROB_TABLE[i] = logGapOpenProbability;
- GAP_CONT_PROB_TABLE[i] = logGapContinuationProbability;
+ GAP_OPEN_PROB_TABLE[i] = gop;
+ GAP_CONT_PROB_TABLE[i] = gcp;
}
- double gop = logGapOpenProbability;
- double gcp = logGapContinuationProbability;
double step = GAP_PENALTY_HRUN_STEP/10.0;
double maxGOP = -MIN_GAP_OPEN_PENALTY/10.0; // phred to log prob
@@ -185,60 +187,57 @@ public class PairHMMIndelErrorModel {
}
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
- double[] currentGOP, double[] currentGCP, int eventLength) {
+ double[] currentGOP, double[] currentGCP, int indToStart,
+ double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
final int X_METRIC_LENGTH = readBases.length+1;
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
- // initialize path metric and traceback memories for likelihood computation
- final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ if (indToStart == 0) {
+ // default initialization for all arrays
-
- final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
- // default initialization for all arrays
- for (int i=0; i < X_METRIC_LENGTH; i++) {
- Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
- Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
- Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
- }
-
- for (int i=1; i < X_METRIC_LENGTH; i++) {
- //initialize first column
- XMetricArray[i][0] = END_GAP_COST*(i);
- }
-
- for (int j=1; j < Y_METRIC_LENGTH; j++) {
- // initialize first row
- YMetricArray[0][j] = END_GAP_COST*(j);
- }
- matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
- XMetricArray[0][0]= YMetricArray[0][0] = 0;
-
-
- final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
- final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
-
- int idxWithMaxElement = 0;
-
- double maxElementInDiag = Double.NEGATIVE_INFINITY;
-
- for (int diag=0; diag < numDiags; diag++) {
- // compute default I and J start positions at edge of diagonals
- int indI = 0;
- int indJ = diag;
- if (diag >= Y_METRIC_LENGTH ) {
- indI = diag-(Y_METRIC_LENGTH-1);
- indJ = Y_METRIC_LENGTH-1;
+ for (int i=0; i < X_METRIC_LENGTH; i++) {
+ Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
+ Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
+ Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
}
- // first pass: from max element to edge
- int idxLow = bandedLikelihoods? idxWithMaxElement : 0;
+ for (int i=1; i < X_METRIC_LENGTH; i++) {
+ //initialize first column
+ XMetricArray[i][0] = END_GAP_COST*(i);
+ }
- // reset diag max value before starting
- if (bandedLikelihoods) {
- maxElementInDiag = Double.NEGATIVE_INFINITY;
+ for (int j=1; j < Y_METRIC_LENGTH; j++) {
+ // initialize first row
+ YMetricArray[0][j] = END_GAP_COST*(j);
+ }
+ matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
+ XMetricArray[0][0]= YMetricArray[0][0] = 0;
+ }
+
+
+ if (bandedLikelihoods) {
+ final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
+
+ final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
+ final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
+
+ int idxWithMaxElement = 0;
+
+ for (int diag=indToStart; diag < numDiags; diag++) {
+ // compute default I and J start positions at edge of diagonals
+ int indI = 0;
+ int indJ = diag;
+ if (diag >= Y_METRIC_LENGTH ) {
+ indI = diag-(Y_METRIC_LENGTH-1);
+ indJ = Y_METRIC_LENGTH-1;
+ }
+
+ // first pass: from max element to edge
+ int idxLow = idxWithMaxElement;
+
+ // reset diag max value before starting
+ double maxElementInDiag = Double.NEGATIVE_INFINITY;
// set indI, indJ to correct values
indI += idxLow;
indJ -= idxLow;
@@ -248,46 +247,10 @@ public class PairHMMIndelErrorModel {
indJ++;
}
- }
-
- for (int el = idxLow; el < elemsInDiag; el++) {
- updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
- currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
- // update max in diagonal
- if (bandedLikelihoods) {
- final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
-
- // check if we've fallen off diagonal value by threshold
- if (bestMetric > maxElementInDiag) {
- maxElementInDiag = bestMetric;
- idxWithMaxElement = el;
- }
- else if (bestMetric < maxElementInDiag - DIAG_TOL)
- break; // done w/current diagonal
- }
-
- indI++;
- if (indI >=X_METRIC_LENGTH )
- break;
- indJ--;
- if (indJ <= 0)
- break;
- }
- if (bandedLikelihoods && idxLow > 0) {
- // now do second part in opposite direction
- indI = 0;
- indJ = diag;
- if (diag >= Y_METRIC_LENGTH ) {
- indI = diag-(Y_METRIC_LENGTH-1);
- indJ = Y_METRIC_LENGTH-1;
- }
-
- indI += idxLow-1;
- indJ -= idxLow-1;
- for (int el = idxLow-1; el >= 0; el--) {
+ for (int el = idxLow; el < elemsInDiag; el++) {
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
- currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
+ currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
// update max in diagonal
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
@@ -296,34 +259,81 @@ public class PairHMMIndelErrorModel {
maxElementInDiag = bestMetric;
idxWithMaxElement = el;
}
- else if (bestMetric < maxElementInDiag - DIAG_TOL)
+ else if (bestMetric < maxElementInDiag - DIAG_TOL && idxWithMaxElement > 0)
break; // done w/current diagonal
- indJ++;
- if (indJ >= Y_METRIC_LENGTH )
+ indI++;
+ if (indI >=X_METRIC_LENGTH )
break;
- indI--;
- if (indI <= 0)
+ indJ--;
+ if (indJ <= 0)
break;
}
+ if (idxLow > 0) {
+ // now do second part in opposite direction
+ indI = 0;
+ indJ = diag;
+ if (diag >= Y_METRIC_LENGTH ) {
+ indI = diag-(Y_METRIC_LENGTH-1);
+ indJ = Y_METRIC_LENGTH-1;
+ }
+
+ indI += idxLow-1;
+ indJ -= idxLow-1;
+ for (int el = idxLow-1; el >= 0; el--) {
+
+ updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
+ currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
+ // update max in diagonal
+ final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
+
+ // check if we've fallen off diagonal value by threshold
+ if (bestMetric > maxElementInDiag) {
+ maxElementInDiag = bestMetric;
+ idxWithMaxElement = el;
+ }
+ else if (bestMetric < maxElementInDiag - DIAG_TOL)
+ break; // done w/current diagonal
+
+ indJ++;
+ if (indJ >= Y_METRIC_LENGTH )
+ break;
+ indI--;
+ if (indI <= 0)
+ break;
+ }
+ }
+ // if (DEBUG)
+ // System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
}
- // if (DEBUG)
- // System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
}
+ else {
+ // simplified rectangular version of update loop
+ for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
+ for (int indJ=indToStart+1; indJ < Y_METRIC_LENGTH; indJ++) {
+ updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
+ currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
+
+ }
+ }
+ }
+
+
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
XMetricArray[bestI][bestJ],
YMetricArray[bestI][bestJ]);
- /*
+
+ /*
if (DEBUG) {
PrintStream outx, outy, outm, outs;
double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
try {
- outx = new PrintStream("../../UGOptim/datax.txt");
- outy = new PrintStream("../../UGOptim/datay.txt");
- outm = new PrintStream("../../UGOptim/datam.txt");
- outs = new PrintStream("../../UGOptim/datas.txt");
+ outx = new PrintStream("datax.txt");
+ outy = new PrintStream("datay.txt");
+ outm = new PrintStream("datam.txt");
+ outs = new PrintStream("datas.txt");
double metrics[] = new double[3];
for (int indI=0; indI < X_METRIC_LENGTH; indI++) {
for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) {
@@ -393,7 +403,7 @@ public class PairHMMIndelErrorModel {
for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations
- final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
+ final boolean isReduced = p.isReducedRead();
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
@@ -414,8 +424,6 @@ public class PairHMMIndelErrorModel {
continue;
}
- double[] recalQuals = null;
-
// get bases of candidate haplotypes that overlap with reads
final int trailingBases = 3;
@@ -534,6 +542,12 @@ public class PairHMMIndelErrorModel {
unclippedReadBases.length-numEndClippedBases);
int j=0;
+
+ // initialize path metric and traceback memories for likelihood computation
+ double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
+ byte[] previousHaplotypeSeen = null;
+ double[] previousGOP = null;
+ int startIdx;
for (Allele a: haplotypeMap.keySet()) {
@@ -551,11 +565,37 @@ public class PairHMMIndelErrorModel {
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(),
(int)indStart, (int)indStop);
+ double readLikelihood;
+ if (matchMetricArray == null) {
+ final int X_METRIC_LENGTH = readBases.length+1;
+ final int Y_METRIC_LENGTH = haplotypeBases.length+1;
+
+ matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ }
final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
- final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
- currentContextGOP, currentContextGCP, eventLength);
+ if (previousHaplotypeSeen == null)
+ startIdx = 0;
+ else {
+ int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
+ int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP);
+ startIdx = Math.min(s1,s2);
+ }
+ previousHaplotypeSeen = haplotypeBases.clone();
+ previousGOP = currentContextGOP.clone();
+
+
+ readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
+ currentContextGOP, currentContextGCP, startIdx, matchMetricArray, XMetricArray, YMetricArray);
+ if (DEBUG) {
+ System.out.println("H:"+new String(haplotypeBases));
+ System.out.println("R:"+new String(readBases));
+ System.out.format("L:%4.2f\n",readLikelihood);
+ System.out.format("StPos:%d\n", startIdx);
+ }
readEl.put(a,readLikelihood);
readLikelihoods[readIdx][j++] = readLikelihood;
}
@@ -579,6 +619,28 @@ public class PairHMMIndelErrorModel {
return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
}
+ private int computeFirstDifferingPosition(byte[] b1, byte[] b2) {
+ if (b1.length != b2.length)
+ return 0; // sanity check
+
+ for (int i=0; i < b1.length; i++ ){
+ if ( b1[i]!= b2[i])
+ return i;
+ }
+ return 0; // sanity check
+ }
+
+ private int computeFirstDifferingPosition(double[] b1, double[] b2) {
+ if (b1.length != b2.length)
+ return 0; // sanity check
+
+ for (int i=0; i < b1.length; i++ ){
+ if ( b1[i]!= b2[i])
+ return i;
+ }
+ return 0; // sanity check
+ }
+
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
index e117454f9..e10334a77 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
@@ -2,9 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
+import java.util.EnumSet;
import java.util.List;
/*
@@ -46,6 +49,9 @@ import java.util.List;
*/
public class CycleCovariate implements StandardCovariate {
+ private final static EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
+ private final static EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
+
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM != null ) {
@@ -58,122 +64,6 @@ public class CycleCovariate implements StandardCovariate {
}
}
- /*
- // Used to pick out the covariate's value from attributes of the read
- public final Comparable getValue( final SAMRecord read, final int offset ) {
-
- int cycle = 1;
-
- //-----------------------------
- // ILLUMINA and SOLID
- //-----------------------------
-
- if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
- read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
- cycle = offset + 1;
- if( read.getReadNegativeStrandFlag() ) {
- cycle = read.getReadLength() - offset;
- }
- }
-
- //-----------------------------
- // 454
- //-----------------------------
-
- else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
- final byte[] bases = read.getReadBases();
-
- // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
- // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
- if( !read.getReadNegativeStrandFlag() ) { // Forward direction
- int iii = 0;
- while( iii <= offset )
- {
- while( iii <= offset && bases[iii] == (byte)'T' ) { iii++; }
- while( iii <= offset && bases[iii] == (byte)'A' ) { iii++; }
- while( iii <= offset && bases[iii] == (byte)'C' ) { iii++; }
- while( iii <= offset && bases[iii] == (byte)'G' ) { iii++; }
- if( iii <= offset ) { cycle++; }
- if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; }
-
- }
- } else { // Negative direction
- int iii = bases.length-1;
- while( iii >= offset )
- {
- while( iii >= offset && bases[iii] == (byte)'T' ) { iii--; }
- while( iii >= offset && bases[iii] == (byte)'A' ) { iii--; }
- while( iii >= offset && bases[iii] == (byte)'C' ) { iii--; }
- while( iii >= offset && bases[iii] == (byte)'G' ) { iii--; }
- if( iii >= offset ) { cycle++; }
- if( iii >= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii--; }
- }
- }
- }
-
- //-----------------------------
- // SOLID (unused), only to be used in conjunction with PrimerRoundCovariate
- //-----------------------------
-
- //else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
- // // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
- // int pos = offset + 1;
- // if( read.getReadNegativeStrandFlag() ) {
- // pos = read.getReadLength() - offset;
- // }
- // cycle = pos / 5; // integer division
- //}
-
- //-----------------------------
- // UNRECOGNIZED PLATFORM
- //-----------------------------
-
- else { // Platform is unrecognized so revert to the default platform but warn the user first
- if( defaultPlatform != null) { // The user set a default platform
- if( !warnedUserBadPlatform ) {
- Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
- "Defaulting to platform = " + defaultPlatform + "." );
- }
- warnedUserBadPlatform = true;
-
- read.getReadGroup().setPlatform( defaultPlatform );
- return getValue( read, offset ); // A recursive call
- } else { // The user did not set a default platform
- throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
- "No default platform specified. Users must set the default platform using the --default_platform argument." );
- }
- }
-
- // Differentiate between first and second of pair.
- // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
- // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
- // Therefore the cycle covariate must differentiate between first and second of pair reads.
- // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
- // the current sequential model would consider the effects independently instead of jointly.
- if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
- cycle *= -1;
- }
-
- return cycle;
- }
- */
-
- // todo -- this should be put into a common place in the code base
- private static List ILLUMINA_NAMES = Arrays.asList("ILLUMINA", "SLX", "SOLEXA");
- private static List SOLID_NAMES = Arrays.asList("SOLID");
- private static List LS454_NAMES = Arrays.asList("454");
- private static List COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE");
- private static List PACBIO_NAMES = Arrays.asList("PACBIO");
- private static List ION_TORRENT_NAMES = Arrays.asList("IONTORRENT");
-
- private static boolean isPlatform(SAMRecord read, List names) {
- String pl = read.getReadGroup().getPlatform().toUpperCase();
- for ( String name : names )
- if ( pl.contains( name ) )
- return true;
- return false;
- }
-
// Used to pick out the covariate's value from attributes of the read
public void getValues(SAMRecord read, Comparable[] comparable) {
@@ -181,7 +71,8 @@ public class CycleCovariate implements StandardCovariate {
// Illumina, Solid, PacBio, and Complete Genomics
//-----------------------------
- if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES) || isPlatform(read, COMPLETE_GENOMICS_NAMES) ) {
+ final NGSPlatform ngsPlatform = ((GATKSAMRecord)read).getNGSPlatform();
+ if( DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
final int init;
final int increment;
if( !read.getReadNegativeStrandFlag() ) {
@@ -227,8 +118,7 @@ public class CycleCovariate implements StandardCovariate {
//-----------------------------
// 454 and Ion Torrent
//-----------------------------
-
- else if ( isPlatform(read, LS454_NAMES) || isPlatform(read, ION_TORRENT_NAMES)) { // Some bams have "LS454" and others have just "454"
+ else if( FLOW_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
final int readLength = read.getReadLength();
final byte[] bases = read.getReadBases();
@@ -273,8 +163,6 @@ public class CycleCovariate implements StandardCovariate {
else {
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
}
-
-
}
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 2daa8c025..a0c928afa 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
@@ -228,8 +229,7 @@ public class RecalDataManager {
* @param RAC The list of shared command line arguments
*/
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
-
- SAMReadGroupRecord readGroup = read.getReadGroup();
+ GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord)read).getReadGroup();
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
if( readGroup == null ) {
@@ -241,7 +241,7 @@ public class RecalDataManager {
warnUserNullReadGroup = true;
}
// There is no readGroup so defaulting to these values
- readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
+ readGroup = new GATKSAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
@@ -251,7 +251,7 @@ public class RecalDataManager {
if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
final String oldPlatform = readGroup.getPlatform();
- readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP );
+ readGroup = new GATKSAMReadGroupRecord( RAC.FORCE_READ_GROUP );
readGroup.setPlatform( oldPlatform );
((GATKSAMRecord)read).setReadGroup( readGroup );
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java
new file mode 100644
index 000000000..1b9513b9a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java
@@ -0,0 +1,52 @@
+package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Stratifies the eval RODs by the indel size
+ *
+ * Indel sizes are stratified from sizes -100 to +100. Sizes greater than this are lumped in the +/- 100 bin
+ * This stratification ignores multi-allelic indels (whose size is not defined uniquely)
+ */
+public class IndelSize extends VariantStratifier {
+ static final int MAX_INDEL_SIZE = 100;
+ @Override
+ public void initialize() {
+ states = new ArrayList();
+ for( int a=-MAX_INDEL_SIZE; a <=MAX_INDEL_SIZE; a++ ) {
+ states.add(String.format("%d", a));
+ }
+ }
+
+ public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
+ ArrayList relevantStates = new ArrayList();
+
+ if (eval != null && eval.isIndel() && eval.isBiallelic()) {
+ try {
+ int eventLength = 0;
+ if ( eval.isSimpleInsertion() ) {
+ eventLength = eval.getAlternateAllele(0).length();
+ } else if ( eval.isSimpleDeletion() ) {
+ eventLength = -eval.getReference().length();
+ }
+
+ if (eventLength > MAX_INDEL_SIZE)
+ eventLength = MAX_INDEL_SIZE;
+ else if (eventLength < -MAX_INDEL_SIZE)
+ eventLength = -MAX_INDEL_SIZE;
+
+ relevantStates.add(String.format("%d",eventLength));
+ } catch (Exception e) {
+ return relevantStates;
+ }
+ }
+
+ return relevantStates;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java
new file mode 100644
index 000000000..4f01f2b7a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils;
+
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+
+/**
+ * A canonical, master list of the standard NGS platforms. These values
+ * can be obtained (efficiently) from a GATKSAMRecord object with the
+ * getNGSPlatform method.
+ *
+ * @author Mark DePristo
+ * @since 2011
+ */
+public enum NGSPlatform {
+ ILLUMINA("ILLUMINA", "SLX", "SOLEXA"),
+ SOLID("SOLID"),
+ LS454("454"),
+ COMPLETE_GENOMICS("COMPLETE"),
+ PACBIO("PACBIO"),
+ ION_TORRENT("IONTORRENT"),
+ UNKNOWN("UNKNOWN");
+
+ /**
+ * Array of the prefix names in a BAM file for each of the platforms.
+ */
+ private final String[] BAM_PL_NAMES;
+
+ NGSPlatform(final String... BAM_PL_NAMES) {
+ for ( int i = 0; i < BAM_PL_NAMES.length; i++ )
+ BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase();
+ this.BAM_PL_NAMES = BAM_PL_NAMES;
+ }
+
+ /**
+ * Returns a representative PL string for this platform
+ * @return
+ */
+ public final String getDefaultPlatform() {
+ return BAM_PL_NAMES[0];
+ }
+
+ /**
+ * Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord.
+ * Note you should not use this function if you have a GATKSAMRecord -- use the
+ * accessor method instead.
+ *
+ * @param read
+ * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
+ */
+ public static final NGSPlatform fromRead(SAMRecord read) {
+ return fromReadGroup(read.getReadGroup());
+ }
+
+ /**
+ * Returns the NGSPlatform corresponding to the PL tag in the read group
+ * @param rg
+ * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
+ */
+ public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) {
+ return fromReadGroupPL(rg.getPlatform());
+ }
+
+ /**
+ * Returns the NGSPlatform corresponding to the PL tag in the read group
+ * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
+ * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
+ */
+ public static final NGSPlatform fromReadGroupPL(final String plFromRG) {
+ if ( plFromRG == null ) return UNKNOWN;
+
+ // todo -- algorithm could be implemented more efficiently, as the list of all
+ // todo -- names is known upfront, so a decision tree could be used to identify
+ // todo -- a prefix common to PL
+ final String pl = plFromRG.toUpperCase();
+ for ( final NGSPlatform ngsPlatform : NGSPlatform.values() ) {
+ for ( final String bamPLName : ngsPlatform.BAM_PL_NAMES ) {
+ if ( pl.contains(bamPLName) )
+ return ngsPlatform;
+ }
+ }
+
+ return UNKNOWN;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java
index 58f7942fe..9180447b9 100644
--- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java
+++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java
@@ -25,35 +25,35 @@
package org.broadinstitute.sting.utils.R;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.ArgumentCollection;
-import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
-import org.broadinstitute.sting.utils.PathUtils;
import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.io.IOUtils;
+import org.broadinstitute.sting.utils.io.Resource;
+import org.broadinstitute.sting.utils.runtime.ProcessController;
+import org.broadinstitute.sting.utils.runtime.ProcessSettings;
import java.io.File;
-import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
- * Generic service for executing RScripts in the GATK directory
- *
- * @author Your Name
- * @since Date created
+ * Generic service for executing RScripts
*/
public class RScriptExecutor {
/**
* our log
*/
- protected static Logger logger = Logger.getLogger(RScriptExecutor.class);
+ private static Logger logger = Logger.getLogger(RScriptExecutor.class);
public static class RScriptArgumentCollection {
@Advanced
- @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
+ @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. Defaults Rscript meaning to use the first available on the environment PATH. For Broad users should 'use R-2.12' or later.", required = false)
public String PATH_TO_RSCRIPT = "Rscript";
@Advanced
@@ -62,40 +62,119 @@ public class RScriptExecutor {
public RScriptArgumentCollection() {}
- /** For testing and convenience */
+ /* For testing and convenience */
public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) {
this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT;
this.PATH_TO_RESOURCES = PATH_TO_RESOURCES;
}
}
- final RScriptArgumentCollection myArgs;
- final boolean exceptOnError;
+ private final RScriptArgumentCollection myArgs;
+ private final boolean exceptOnError;
+ private final List libraries = new ArrayList();
+ private final List scriptResources = new ArrayList();
+ private final List scriptFiles = new ArrayList();
+ private final List args = new ArrayList();
public RScriptExecutor(final RScriptArgumentCollection myArgs, final boolean exceptOnError) {
this.myArgs = myArgs;
this.exceptOnError = exceptOnError;
}
- public void callRScripts(String scriptName, Object... scriptArgs) {
- callRScripts(scriptName, Arrays.asList(scriptArgs));
+ public void addLibrary(RScriptLibrary library) {
+ this.libraries.add(library);
}
- public void callRScripts(String scriptName, List