diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..8623fa076 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +/*.bam +/*.bai +/*.bed +*.idx +*~ +/*.vcf +/*.txt +/*.csh +/.* +/*.pdf +/*.eval +*.ipr +*.iws +*.iml +.DS_Store +queueScatterGather +/foo* +/bar* +integrationtests/ +public/testdata/onTheFlyOutputTest.vcf diff --git a/build.xml b/build.xml index 446982a44..6ca959c38 100644 --- a/build.xml +++ b/build.xml @@ -28,6 +28,8 @@ + + @@ -35,18 +37,25 @@ + + + + + + - + - + @@ -60,7 +69,7 @@ - + @@ -82,7 +91,7 @@ - + @@ -113,7 +122,7 @@ - + @@ -154,7 +163,7 @@ - + @@ -211,11 +220,11 @@ - + - + @@ -224,11 +233,11 @@ - + - + @@ -266,7 +275,7 @@ - + @@ -312,13 +321,13 @@ - + - + @@ -327,11 +336,11 @@ - + - @@ -341,9 +350,9 @@ - + - + @@ -362,14 +371,14 @@ - + - + - - + @@ -413,9 +422,9 @@ - + - + @@ -424,12 +433,12 @@ - + - + @@ -532,6 +541,11 @@ + + + + + @@ -539,7 +553,7 @@ - + @@ -551,6 +565,12 @@ + + + + + + @@ -579,6 +599,10 @@ + + + + @@ -593,6 +617,10 @@ + + + + @@ -605,28 +633,7 @@ - @@ -643,6 +650,9 @@ + + + @@ -682,20 +692,7 @@ - + @@ -780,10 +777,6 @@ - @@ -800,10 +793,6 @@ - @@ -851,6 +840,8 @@ + + @@ -1187,19 +1178,18 @@ - - + - + - + diff --git a/public/R/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R similarity index 100% rename from public/R/queueJobReport.R rename to public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION similarity index 100% rename from public/R/src/gsalib/DESCRIPTION rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R similarity index 100% rename from public/R/src/gsalib/R/gsa.error.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R similarity index 100% rename from public/R/src/gsalib/R/gsa.getargs.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R similarity index 100% rename from public/R/src/gsalib/R/gsa.message.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R similarity index 100% rename from public/R/src/gsalib/R/gsa.plot.venn.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.eval.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.gatkreport.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.squidmetrics.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.vcf.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R similarity index 100% rename from public/R/src/gsalib/R/gsa.warn.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me similarity index 100% rename from public/R/src/gsalib/Read-and-delete-me rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg similarity index 100% rename from public/R/src/gsalib/data/tearsheetdrop.jpg rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.error.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.getargs.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.message.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.plot.venn.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.eval.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.gatkreport.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.squidmetrics.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.vcf.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.warn.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd similarity index 100% rename from public/R/src/gsalib/man/gsalib-package.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index 351583c07..c0823e5c5 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable { /** * Maps indices of command line arguments to values paired with that argument. */ - public final SortedMap> indices = new TreeMap>(); + public final SortedMap> sites = new TreeMap>(); /** * An ordered, freeform collection of tags. @@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable { } /** - * A simple way of indicating that an argument with the given label and definition exists at this index. + * A simple way of indicating that an argument with the given label and definition exists at this site. * @param label Label of the argument match. Must not be null. * @param definition The associated definition, if one exists. May be null. - * @param index Position of the argument. Must not be null. + * @param site Position of the argument. Must not be null. * @param tags ordered freeform text tags associated with this argument. */ - public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) { - this( label, definition, index, null, tags ); + public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) { + this( label, definition, site, null, tags ); } /** - * A simple way of indicating that an argument with the given label and definition exists at this index. + * A simple way of indicating that an argument with the given label and definition exists at this site. * @param label Label of the argument match. Must not be null. * @param definition The associated definition, if one exists. May be null. - * @param index Position of the argument. Must not be null. + * @param site Position of the argument. Must not be null. * @param value Value for the argument at this position. * @param tags ordered freeform text tags associated with this argument. */ - private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) { + private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) { this.label = label; this.definition = definition; ArrayList values = new ArrayList(); if( value != null ) values.add(value); - indices.put(index,values ); + sites.put(site,values ); this.tags = tags; } @@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable { ArgumentMatch otherArgumentMatch = (ArgumentMatch)other; return this.definition.equals(otherArgumentMatch.definition) && this.label.equals(otherArgumentMatch.label) && - this.indices.equals(otherArgumentMatch.indices) && + this.sites.equals(otherArgumentMatch.sites) && this.tags.equals(otherArgumentMatch.tags); } @@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable { * @param key Key which specifies the transform. * @return A variant of this ArgumentMatch with all keys transformed. */ + @SuppressWarnings("unchecked") ArgumentMatch transform(Multiplexer multiplexer, Object key) { - SortedMap> newIndices = new TreeMap>(); - for(Map.Entry> index: indices.entrySet()) { + SortedMap> newIndices = new TreeMap>(); + for(Map.Entry> site: sites.entrySet()) { List newEntries = new ArrayList(); - for(String entry: index.getValue()) + for(String entry: site.getValue()) newEntries.add(multiplexer.transformArgument(key,entry)); - newIndices.put(index.getKey(),newEntries); + newIndices.put(site.getKey(),newEntries); } ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); - newArgumentMatch.indices.putAll(newIndices); + newArgumentMatch.sites.putAll(newIndices); return newArgumentMatch; } @@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable { public Iterator iterator() { return new Iterator() { /** - * Iterate over each the available index. + * Iterate over each the available site. */ - private Iterator indexIterator = null; + private Iterator siteIterator = null; /** * Iterate over each available token. @@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable { private Iterator tokenIterator = null; /** - * The next index to return. Null if none remain. + * The next site to return. Null if none remain. */ - Integer nextIndex = null; + ArgumentMatchSite nextSite = null; /** * The next token to return. Null if none remain. @@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable { String nextToken = null; { - indexIterator = indices.keySet().iterator(); + siteIterator = sites.keySet().iterator(); prepareNext(); } @@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable { * @return True if there's another token waiting in the wings. False otherwise. */ public boolean hasNext() { - return nextToken != null; + return nextToken != null; } /** @@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable { * @return The next ArgumentMatch in the series. Should never be null. */ public ArgumentMatch next() { - if( nextIndex == null || nextToken == null ) + if( nextSite == null || nextToken == null ) throw new IllegalStateException( "No more ArgumentMatches are available" ); - ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags ); + ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags ); prepareNext(); return match; } /** * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, - * initialize nextIndex / nextToken to null. + * initialize nextSite / nextToken to null. */ private void prepareNext() { if( tokenIterator != null && tokenIterator.hasNext() ) { nextToken = tokenIterator.next(); } else { - nextIndex = null; + nextSite = null; nextToken = null; // Do a nested loop. While more data is present in the inner loop, grab that data. // Otherwise, troll the outer iterator looking for more data. - while( indexIterator.hasNext() ) { - nextIndex = indexIterator.next(); - if( indices.get(nextIndex) != null ) { - tokenIterator = indices.get(nextIndex).iterator(); + while( siteIterator.hasNext() ) { + nextSite = siteIterator.next(); + if( sites.get(nextSite) != null ) { + tokenIterator = sites.get(nextSite).iterator(); if( tokenIterator.hasNext() ) { nextToken = tokenIterator.next(); break; @@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable { * @param other The other match to merge into. */ public void mergeInto( ArgumentMatch other ) { - indices.putAll(other.indices); + sites.putAll(other.sites); } /** * Associate a value with this merge maapping. - * @param index index of the command-line argument to which this value is mated. + * @param site site of the command-line argument to which this value is mated. * @param value Text representation of value to add. */ - public void addValue( int index, String value ) { - if( !indices.containsKey(index) || indices.get(index) == null ) - indices.put(index, new ArrayList() ); - indices.get(index).add(value); + public void addValue( ArgumentMatchSite site, String value ) { + if( !sites.containsKey(site) || sites.get(site) == null ) + sites.put(site, new ArrayList() ); + sites.get(site).add(value); } /** * Does this argument already have a value at the given site? * Arguments are only allowed to be single-valued per site, and * flags aren't allowed a value at all. - * @param index Index at which to check for values. + * @param site Site at which to check for values. * @return True if the argument has a value at the given site. False otherwise. */ - public boolean hasValueAtSite( int index ) { - return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag(); + public boolean hasValueAtSite( ArgumentMatchSite site ) { + return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag(); } /** @@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable { */ public List values() { List values = new ArrayList(); - for( int index: indices.keySet() ) { - if( indices.get(index) != null ) - values.addAll(indices.get(index)); + for( ArgumentMatchSite site: sites.keySet() ) { + if( sites.get(site) != null ) + values.addAll(sites.get(site)); } return values; } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java new file mode 100644 index 000000000..8a4120101 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +/** + * Which source and the index within the source where an argument match was found. + */ +public class ArgumentMatchSite implements Comparable { + private final ArgumentMatchSource source; + private final int index; + + public ArgumentMatchSite(ArgumentMatchSource source, int index) { + this.source = source; + this.index = index; + } + + public ArgumentMatchSource getSource() { + return source; + } + + public int getIndex() { + return index; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ArgumentMatchSite that = (ArgumentMatchSite) o; + + return (index == that.index) && (source == null ? that.source == null : source.equals(that.source)); + } + + @Override + public int hashCode() { + int result = source != null ? source.hashCode() : 0; + // Generated by intellij. No other special reason to this implementation. -ks + result = 31 * result + index; + return result; + } + + @Override + public int compareTo(ArgumentMatchSite that) { + int comp = this.source.compareTo(that.source); + if (comp != 0) + return comp; + + // Both files are the same. + if (this.index == that.index) + return 0; + return this.index < that.index ? -1 : 1; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java new file mode 100644 index 000000000..ed2700006 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Where an argument match originated, via the commandline or a file. + */ +public class ArgumentMatchSource implements Comparable { + public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null); + + private final ArgumentMatchSourceType type; + private final File file; + + /** + * Creates an argument match source from the specified file. + * @param file File specifying the arguments. Must not be null. + */ + public ArgumentMatchSource(File file) { + this(ArgumentMatchSourceType.File, file); + } + + private ArgumentMatchSource(ArgumentMatchSourceType type, File file) { + if (type == ArgumentMatchSourceType.File && file == null) + throw new IllegalArgumentException("An argument match source of type File cannot have a null file."); + this.type = type; + this.file = file; + } + + public ArgumentMatchSourceType getType() { + return type; + } + + public File getFile() { + return file; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ArgumentMatchSource that = (ArgumentMatchSource) o; + + return (type == that.type) && (file == null ? that.file == null : file.equals(that.file)); + } + + @Override + public int hashCode() { + int result = type != null ? type.hashCode() : 0; + result = 31 * result + (file != null ? file.hashCode() : 0); + return result; + } + + /** + * Compares two sources, putting the command line first, then files. + */ + @Override + public int compareTo(ArgumentMatchSource that) { + int comp = this.type.compareTo(that.type); + if (comp != 0) + return comp; + + File f1 = this.file; + File f2 = that.file; + + if ((f1 == null) ^ (f2 == null)) { + // If one of the files is null and the other is not + // put the null file first + return f1 == null ? -1 : 1; + } + + return f1 == null ? 0 : f1.compareTo(f2); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java new file mode 100644 index 000000000..3ff6e21d4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +/** + * Type of where an argument match originated, via the commandline or a file. + */ +public enum ArgumentMatchSourceType { + CommandLine, File +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java index 52d3b8232..3da28c420 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java @@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable { * Collection matches from argument definition to argument value. * Package protected access is deliberate. */ - Map argumentMatches = new TreeMap(); + Map argumentMatches = new TreeMap(); /** * Provide a place to put command-line argument values that don't seem to belong to @@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable { * @param site Site at which to check. * @return True if the site has a match. False otherwise. */ - boolean hasMatch( int site ) { + boolean hasMatch( ArgumentMatchSite site ) { return argumentMatches.containsKey( site ); } @@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable { * @return The match present at the given site. * @throws IllegalArgumentException if site does not contain a match. */ - ArgumentMatch getMatch( int site ) { + ArgumentMatch getMatch( ArgumentMatchSite site ) { if( !argumentMatches.containsKey(site) ) throw new IllegalArgumentException( "Site does not contain an argument: " + site ); return argumentMatches.get(site); @@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable { /** * Return all argument matches of this source. + * @param parsingEngine Parsing engine. * @param argumentSource Argument source to match. * @return List of all matches. */ @@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable { * TODO: Generify this. * @param multiplexer Multiplexer that controls the transformation process. * @param key Key which specifies the transform. + * @return new argument matches. */ ArgumentMatches transform(Multiplexer multiplexer, Object key) { ArgumentMatches newArgumentMatches = new ArgumentMatches(); @@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable { for( ArgumentMatch argumentMatch: getUniqueMatches() ) { if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) { argumentMatch.mergeInto( match ); - for( int index: match.indices.keySet() ) - argumentMatches.put( index, argumentMatch ); + for( ArgumentMatchSite site: match.sites.keySet() ) + argumentMatches.put( site, argumentMatch ); definitionExists = true; } } if( !definitionExists ) { - for( int index: match.indices.keySet() ) - argumentMatches.put( index, match ); + for( ArgumentMatchSite site: match.sites.keySet() ) + argumentMatches.put( site, match ); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index d88e7030e..bed1e710e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.EnumSet; -import java.util.Locale; +import java.util.*; public abstract class CommandLineProgram { @@ -155,6 +152,7 @@ public abstract class CommandLineProgram { * * @param clp the command line program to execute * @param args the command line arguments passed in + * @param dryRun dry run * @throws Exception when an exception occurs */ @SuppressWarnings("unchecked") @@ -176,6 +174,8 @@ public abstract class CommandLineProgram { ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); + Map> parsedArgs; + // process the args if (clp.canAddArgumentsDynamically()) { // if the command-line program can toss in extra args, fetch them and reparse the arguments. @@ -196,14 +196,14 @@ public abstract class CommandLineProgram { Class[] argumentSources = clp.getArgumentSources(); for (Class argumentSource : argumentSources) parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); - parser.parse(args); + parsedArgs = parser.parse(args); if (isHelpPresent(parser)) printHelpAndExit(clp, parser); if ( ! dryRun ) parser.validate(); } else { - parser.parse(args); + parsedArgs = parser.parse(args); if ( ! dryRun ) { if (isHelpPresent(parser)) @@ -230,7 +230,7 @@ public abstract class CommandLineProgram { } // regardless of what happens next, generate the header information - HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); + HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs); // call the execute CommandLineProgram.result = clp.execute(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index ad58553c1..0fac195e1 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.commandline; import com.google.java.contract.Requires; +import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; +import java.io.File; +import java.io.IOException; import java.lang.reflect.Field; import java.util.*; @@ -101,6 +104,8 @@ public class ParsingEngine { if(clp != null) argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); + + addArgumentSource(ParsingEngineArgumentFiles.class); } /** @@ -149,21 +154,43 @@ public class ParsingEngine { * command-line arguments to the arguments that are actually * required. * @param tokens Tokens passed on the command line. + * @return The parsed arguments by file. */ - public void parse( String[] tokens ) { + public SortedMap> parse( String[] tokens ) { argumentMatches = new ArgumentMatches(); + SortedMap> parsedArgs = new TreeMap>(); - int lastArgumentMatchSite = -1; + List cmdLineTokens = Arrays.asList(tokens); + parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs); - for( int i = 0; i < tokens.length; i++ ) { - String token = tokens[i]; + ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles(); + + // Load the arguments ONLY into the argument files. + // Validation may optionally run on the rest of the arguments. + loadArgumentsIntoObject(argumentFiles); + + for (File file: argumentFiles.files) { + List fileTokens = getArguments(file); + parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs); + } + + return parsedArgs; + } + + private void parse(ArgumentMatchSource matchSource, List tokens, + ArgumentMatches argumentMatches, SortedMap> parsedArgs) { + ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1); + + int i = 0; + for (String token: tokens) { // If the token is of argument form, parse it into its own argument match. // Otherwise, pair it with the most recently used argument discovered. + ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i); if( isArgumentForm(token) ) { - ArgumentMatch argumentMatch = parseArgument( token, i ); + ArgumentMatch argumentMatch = parseArgument( token, site ); if( argumentMatch != null ) { argumentMatches.mergeInto( argumentMatch ); - lastArgumentMatchSite = i; + lastArgumentMatchSite = site; } } else { @@ -171,10 +198,31 @@ public class ParsingEngine { !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); else - argumentMatches.MissingArgument.addValue( i, token ); + argumentMatches.MissingArgument.addValue( site, token ); } + i++; } + + parsedArgs.put(matchSource, tokens); + } + + private List getArguments(File file) { + try { + if (file.getAbsolutePath().endsWith(".list")) { + return getListArguments(file); + } + } catch (IOException e) { + throw new UserException.CouldNotReadInputFile(file, e); + } + throw new UserException.CouldNotReadInputFile(file, "file extension is not .list"); + } + + private List getListArguments(File file) throws IOException { + ArrayList argsList = new ArrayList(); + for (String line: FileUtils.readLines(file)) + argsList.addAll(Arrays.asList(Utils.escapeExpressions(line))); + return argsList; } public enum ValidationType { MissingRequiredArgument, @@ -495,7 +543,7 @@ public class ParsingEngine { * @param position The position of the token in question. * @return ArgumentMatch associated with this token, or null if no match exists. */ - private ArgumentMatch parseArgument( String token, int position ) { + private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) { if( !isArgumentForm(token) ) throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token ); @@ -580,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException { private static String formatArguments( ArgumentMatch invalidValues ) { StringBuilder sb = new StringBuilder(); - for( int index: invalidValues.indices.keySet() ) - for( String value: invalidValues.indices.get(index) ) { - sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) ); + for( ArgumentMatchSite site: invalidValues.sites.keySet() ) + for( String value: invalidValues.sites.get(site) ) { + switch (site.getSource().getType()) { + case CommandLine: + sb.append( String.format("%nInvalid argument value '%s' at position %d.", + value, site.getIndex()) ); + break; + case File: + sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.", + value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) ); + break; + default: + throw new RuntimeException( String.format("Unexpected argument match source type: %s", + site.getSource().getType())); + } if(value != null && Utils.dupString(' ',value.length()).equals(value)) sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); } @@ -635,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException { private static String formatArguments(ArgumentDefinition definition, String argumentPassed) { return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); } -} \ No newline at end of file +} + +/** + * Container class to store the list of argument files. + * The files will be parsed after the command line arguments. + */ +class ParsingEngineArgumentFiles { + @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) + public List files = new ArrayList(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index a070cb5a1..452309e89 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -68,7 +68,7 @@ public abstract class ParsingMethod { * @return An argument match. Definition field will be populated if a match was found or * empty if no appropriate definition could be found. */ - public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { + public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) { // If the argument is valid, parse out the argument. Matcher matcher = pattern.matcher(token); @@ -102,9 +102,7 @@ public abstract class ParsingMethod { // Try to find a matching argument. If found, label that as the match. If not found, add the argument // with a null definition. - ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags); - - return argumentMatch; + return new ArgumentMatch(argument,argumentDefinition,position,tags); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 2b6c280c8..20efc3173 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -221,12 +221,12 @@ public class GenomeAnalysisEngine { ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals); // execute the microscheduler, storing the results - Object result = microScheduler.execute(this.walker, shardStrategy); + return microScheduler.execute(this.walker, shardStrategy); //monitor.stop(); //logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed())); - return result; + //return result; } /** @@ -301,6 +301,10 @@ public class GenomeAnalysisEngine { return method; } + protected void setDownsamplingMethod(DownsamplingMethod method) { + argCollection.setDownsamplingMethod(method); + } + public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); } public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); } @@ -390,7 +394,9 @@ public class GenomeAnalysisEngine { /** * Get the sharding strategy given a driving data source. * + * @param readsDataSource readsDataSource * @param drivingDataSource Data on which to shard. + * @param intervals intervals * @return the sharding strategy */ protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) { @@ -427,7 +433,7 @@ public class GenomeAnalysisEngine { return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region); } - ShardStrategy shardStrategy = null; + ShardStrategy shardStrategy; ShardStrategyFactory.SHATTER_STRATEGY shardType; long SHARD_SIZE = 100000L; @@ -436,6 +442,8 @@ public class GenomeAnalysisEngine { if (walker instanceof RodWalker) SHARD_SIZE *= 1000; if (intervals != null && !intervals.isEmpty()) { + if (readsDataSource == null) + throw new IllegalArgumentException("readsDataSource is null"); if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); @@ -499,7 +507,8 @@ public class GenomeAnalysisEngine { */ private void initializeTempDirectory() { File tempDir = new File(System.getProperty("java.io.tmpdir")); - tempDir.mkdirs(); + if (!tempDir.exists() && !tempDir.mkdirs()) + throw new UserException.BadTmpDir("Unable to create directory"); } /** @@ -707,6 +716,7 @@ public class GenomeAnalysisEngine { * @param reads Reads data source. * @param reference Reference data source. * @param rods a collection of the reference ordered data tracks + * @param manager manager */ private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) { if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) @@ -735,15 +745,22 @@ public class GenomeAnalysisEngine { /** * Gets a data source for the given set of reads. * + * @param argCollection arguments + * @param genomeLocParser parser + * @param refReader reader * @return A data source for the given set of reads. */ private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { DownsamplingMethod method = getDownsamplingMethod(); + // Synchronize the method back into the collection so that it shows up when + // interrogating for the downsample method during command line recreation. + setDownsamplingMethod(method); + if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); - SAMDataSource dataSource = new SAMDataSource( + return new SAMDataSource( samReaderIDs, genomeLocParser, argCollection.useOriginalBaseQualities, @@ -759,14 +776,12 @@ public class GenomeAnalysisEngine { refReader, argCollection.defaultBaseQualities, !argCollection.disableLowMemorySharding); - return dataSource; } /** * Opens a reference sequence file paired with an index. Only public for testing purposes * * @param refFile Handle to a reference sequence file. Non-null. - * @return A thread-safe file wrapper. */ public void setReferenceDataSource(File refFile) { this.referenceDataSource = new ReferenceDataSource(refFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 70819a092..4d7e4e244 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -138,8 +138,8 @@ public class GATKArgumentCollection { /** * Gets the downsampling method explicitly specified by the user. If the user didn't specify - * a default downsampling mechanism, return null. - * @return The explicitly specified downsampling mechanism, or null if none exists. + * a default downsampling mechanism, return the default. + * @return The explicitly specified downsampling mechanism, or the default if none exists. */ public DownsamplingMethod getDownsamplingMethod() { if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null) @@ -149,6 +149,18 @@ public class GATKArgumentCollection { return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction); } + /** + * Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments. + * @param method The downsampling mechanism. + */ + public void setDownsamplingMethod(DownsamplingMethod method) { + if (method == null) + throw new IllegalArgumentException("method is null"); + downsamplingType = method.type; + downsampleCoverage = method.toCoverage; + downsampleFraction = method.toFraction; + } + // -------------------------------------------------------------------------------------------------------------- // // BAQ arguments diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 74d39ecb0..8452aadfd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQSamIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; import java.lang.reflect.InvocationTargetException; @@ -57,6 +58,8 @@ import java.util.*; * Converts shards to SAM iterators over the specified region */ public class SAMDataSource { + final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); + /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -644,7 +647,9 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, byte defaultBaseQualities) { - wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); + if ( useOriginalBaseQualities || defaultBaseQualities >= 0 ) + // only wrap if we are replacing the original qualitiies or using a default base quality + wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away @@ -756,6 +761,7 @@ public class SAMDataSource { public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { for(SAMReaderID readerID: readerIDs) { SAMFileReader reader = new SAMFileReader(readerID.samFile); + reader.setSAMRecordFactory(factory); reader.enableFileSource(true); reader.enableIndexMemoryMapping(false); if(!enableLowMemorySharding) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 2f30d12a8..9a89d2086 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * An iterator which does post-processing of a read, including potentially wrapping @@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator { * no next exists. */ public SAMRecord next() { - return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities); + SAMRecord rec = wrappedIterator.next(); + + // if we are using default quals, check if we need them, and add if necessary. + // 1. we need if reads are lacking or have incomplete quality scores + // 2. we add if defaultBaseQualities has a positive value + if (defaultBaseQualities >= 0) { + byte reads [] = rec.getReadBases(); + byte quals [] = rec.getBaseQualities(); + if (quals == null || quals.length < reads.length) { + byte new_quals [] = new byte [reads.length]; + for (int i=0; i GLs, List Alleles, + protected abstract void getLog10PNonRef(Map GLs, List Alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index f87eae781..1c2d82ab7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -51,9 +51,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { super(UAC, N, logger, verboseWriter); } - public void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, List alleles, + public void getLog10PNonRef(Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index f4195e5f0..27842a8bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -52,9 +52,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { AFMatrix = new AlleleFrequencyMatrix(N); } - protected void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, List alleles, + protected void getLog10PNonRef(Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { initializeAFMatrix(GLs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 58b33924b..aea63b61d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -30,12 +30,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java index 3652763de..4f378b24a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -15,11 +16,11 @@ import java.util.ArrayList; public class MultiallelicGenotypeLikelihoods { private String sample; private double[] GLs; - private ArrayList alleleList; + private List alleleList; private int depth; public MultiallelicGenotypeLikelihoods(String sample, - ArrayList A, + List A, double[] log10Likelihoods, int depth) { /* Check for consistency between likelihood vector and number of alleles */ int numAlleles = A.size(); @@ -40,7 +41,7 @@ public class MultiallelicGenotypeLikelihoods { return GLs; } - public ArrayList getAlleles() { + public List getAlleles() { return alleleList; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index b72b68f9f..9f0585d13 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -325,7 +325,7 @@ public class UnifiedGenotyperEngine { // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); @@ -383,7 +383,7 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); @@ -391,7 +391,7 @@ public class UnifiedGenotyperEngine { // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); @@ -400,7 +400,7 @@ public class UnifiedGenotyperEngine { // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); @@ -447,6 +447,78 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } + // A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs + public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) { + + // initialize the data for this thread if that hasn't been done yet + if ( afcm.get() == null ) { + log10AlleleFrequencyPosteriors.set(new double[N+1]); + afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); + } + + // estimate our confidence in a reference call and return + if ( vc.getNSamples() == 0 ) + return null; + + // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + + // find the most likely frequency + int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); + + // calculate p(f>0) + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); + double sum = 0.0; + for (int i = 1; i <= N; i++) + sum += normalizedPosteriors[i]; + double PofF = Math.min(sum, 1.0); // deal with precision errors + + double phredScaledConfidence; + if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); + if ( Double.isInfinite(phredScaledConfidence) ) + phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + } else { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); + if ( Double.isInfinite(phredScaledConfidence) ) { + sum = 0.0; + for (int i = 1; i <= N; i++) { + if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + break; + sum += log10AlleleFrequencyPosteriors.get()[i]; + } + phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); + } + } + + // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero + if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { + // technically, at this point our confidence in a reference call isn't accurately estimated + // because it didn't take into account samples with no data, so let's get a better estimate + return null; + } + + // create the genotypes + Map genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + + // *** note that calculating strand bias involves overwriting data structures, so we do that last + HashMap attributes = new HashMap(); + + int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); + + Set myAlleles = new HashSet(vc.getAlleles()); + // strip out the alternate allele if it's a ref call + if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { + myAlleles = new HashSet(1); + myAlleles.add(vc.getReference()); + } + VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, + myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, vc.getReferenceBaseForIndel()); + + return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); + } + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 9edf5b5d4..0df7b7cbd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -32,11 +32,17 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.io.File; +import java.io.FileWriter; +import java.io.PrintStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; @@ -45,9 +51,6 @@ import java.util.LinkedHashMap; public class PairHMMIndelErrorModel { public static final int BASE_QUAL_THRESHOLD = 20; - private final double logGapOpenProbability; - private final double logGapContinuationProbability; - private boolean DEBUG = false; private boolean bandedLikelihoods = false; @@ -89,8 +92,6 @@ public class PairHMMIndelErrorModel { } public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) { - this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob - this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob this.DEBUG = deb; this.bandedLikelihoods = bandedLikelihoods; @@ -98,13 +99,14 @@ public class PairHMMIndelErrorModel { this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX]; this.GAP_OPEN_PROB_TABLE = new double[MAX_HRUN_GAP_IDX]; + double gop = -indelGOP/10.0; + double gcp = -indelGCP/10.0; + for (int i = 0; i < START_HRUN_GAP_IDX; i++) { - GAP_OPEN_PROB_TABLE[i] = logGapOpenProbability; - GAP_CONT_PROB_TABLE[i] = logGapContinuationProbability; + GAP_OPEN_PROB_TABLE[i] = gop; + GAP_CONT_PROB_TABLE[i] = gcp; } - double gop = logGapOpenProbability; - double gcp = logGapContinuationProbability; double step = GAP_PENALTY_HRUN_STEP/10.0; double maxGOP = -MIN_GAP_OPEN_PENALTY/10.0; // phred to log prob @@ -185,60 +187,57 @@ public class PairHMMIndelErrorModel { } private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals, - double[] currentGOP, double[] currentGCP, int eventLength) { + double[] currentGOP, double[] currentGCP, int indToStart, + double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) { final int X_METRIC_LENGTH = readBases.length+1; final int Y_METRIC_LENGTH = haplotypeBases.length+1; - // initialize path metric and traceback memories for likelihood computation - final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; - final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; - final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + if (indToStart == 0) { + // default initialization for all arrays - - final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect. - // default initialization for all arrays - for (int i=0; i < X_METRIC_LENGTH; i++) { - Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY); - Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY); - Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY); - } - - for (int i=1; i < X_METRIC_LENGTH; i++) { - //initialize first column - XMetricArray[i][0] = END_GAP_COST*(i); - } - - for (int j=1; j < Y_METRIC_LENGTH; j++) { - // initialize first row - YMetricArray[0][j] = END_GAP_COST*(j); - } - matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY; - XMetricArray[0][0]= YMetricArray[0][0] = 0; - - - final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1; - final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH); - - int idxWithMaxElement = 0; - - double maxElementInDiag = Double.NEGATIVE_INFINITY; - - for (int diag=0; diag < numDiags; diag++) { - // compute default I and J start positions at edge of diagonals - int indI = 0; - int indJ = diag; - if (diag >= Y_METRIC_LENGTH ) { - indI = diag-(Y_METRIC_LENGTH-1); - indJ = Y_METRIC_LENGTH-1; + for (int i=0; i < X_METRIC_LENGTH; i++) { + Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY); + Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY); + Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY); } - // first pass: from max element to edge - int idxLow = bandedLikelihoods? idxWithMaxElement : 0; + for (int i=1; i < X_METRIC_LENGTH; i++) { + //initialize first column + XMetricArray[i][0] = END_GAP_COST*(i); + } - // reset diag max value before starting - if (bandedLikelihoods) { - maxElementInDiag = Double.NEGATIVE_INFINITY; + for (int j=1; j < Y_METRIC_LENGTH; j++) { + // initialize first row + YMetricArray[0][j] = END_GAP_COST*(j); + } + matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY; + XMetricArray[0][0]= YMetricArray[0][0] = 0; + } + + + if (bandedLikelihoods) { + final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect. + + final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1; + final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH); + + int idxWithMaxElement = 0; + + for (int diag=indToStart; diag < numDiags; diag++) { + // compute default I and J start positions at edge of diagonals + int indI = 0; + int indJ = diag; + if (diag >= Y_METRIC_LENGTH ) { + indI = diag-(Y_METRIC_LENGTH-1); + indJ = Y_METRIC_LENGTH-1; + } + + // first pass: from max element to edge + int idxLow = idxWithMaxElement; + + // reset diag max value before starting + double maxElementInDiag = Double.NEGATIVE_INFINITY; // set indI, indJ to correct values indI += idxLow; indJ -= idxLow; @@ -248,46 +247,10 @@ public class PairHMMIndelErrorModel { indJ++; } - } - - for (int el = idxLow; el < elemsInDiag; el++) { - updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases, - currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray); - // update max in diagonal - if (bandedLikelihoods) { - final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]); - - // check if we've fallen off diagonal value by threshold - if (bestMetric > maxElementInDiag) { - maxElementInDiag = bestMetric; - idxWithMaxElement = el; - } - else if (bestMetric < maxElementInDiag - DIAG_TOL) - break; // done w/current diagonal - } - - indI++; - if (indI >=X_METRIC_LENGTH ) - break; - indJ--; - if (indJ <= 0) - break; - } - if (bandedLikelihoods && idxLow > 0) { - // now do second part in opposite direction - indI = 0; - indJ = diag; - if (diag >= Y_METRIC_LENGTH ) { - indI = diag-(Y_METRIC_LENGTH-1); - indJ = Y_METRIC_LENGTH-1; - } - - indI += idxLow-1; - indJ -= idxLow-1; - for (int el = idxLow-1; el >= 0; el--) { + for (int el = idxLow; el < elemsInDiag; el++) { updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases, - currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray); + currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray); // update max in diagonal final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]); @@ -296,34 +259,81 @@ public class PairHMMIndelErrorModel { maxElementInDiag = bestMetric; idxWithMaxElement = el; } - else if (bestMetric < maxElementInDiag - DIAG_TOL) + else if (bestMetric < maxElementInDiag - DIAG_TOL && idxWithMaxElement > 0) break; // done w/current diagonal - indJ++; - if (indJ >= Y_METRIC_LENGTH ) + indI++; + if (indI >=X_METRIC_LENGTH ) break; - indI--; - if (indI <= 0) + indJ--; + if (indJ <= 0) break; } + if (idxLow > 0) { + // now do second part in opposite direction + indI = 0; + indJ = diag; + if (diag >= Y_METRIC_LENGTH ) { + indI = diag-(Y_METRIC_LENGTH-1); + indJ = Y_METRIC_LENGTH-1; + } + + indI += idxLow-1; + indJ -= idxLow-1; + for (int el = idxLow-1; el >= 0; el--) { + + updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases, + currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray); + // update max in diagonal + final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]); + + // check if we've fallen off diagonal value by threshold + if (bestMetric > maxElementInDiag) { + maxElementInDiag = bestMetric; + idxWithMaxElement = el; + } + else if (bestMetric < maxElementInDiag - DIAG_TOL) + break; // done w/current diagonal + + indJ++; + if (indJ >= Y_METRIC_LENGTH ) + break; + indI--; + if (indI <= 0) + break; + } + } + // if (DEBUG) + // System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement); } - // if (DEBUG) - // System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement); } + else { + // simplified rectangular version of update loop + for (int indI=1; indI < X_METRIC_LENGTH; indI++) { + for (int indJ=indToStart+1; indJ < Y_METRIC_LENGTH; indJ++) { + updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases, + currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray); + + } + } + } + + final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1; final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ]); - /* + + /* if (DEBUG) { PrintStream outx, outy, outm, outs; double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; try { - outx = new PrintStream("../../UGOptim/datax.txt"); - outy = new PrintStream("../../UGOptim/datay.txt"); - outm = new PrintStream("../../UGOptim/datam.txt"); - outs = new PrintStream("../../UGOptim/datas.txt"); + outx = new PrintStream("datax.txt"); + outy = new PrintStream("datay.txt"); + outm = new PrintStream("datam.txt"); + outs = new PrintStream("datas.txt"); double metrics[] = new double[3]; for (int indI=0; indI < X_METRIC_LENGTH; indI++) { for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) { @@ -393,7 +403,7 @@ public class PairHMMIndelErrorModel { for (PileupElement p: pileup) { // > 1 when the read is a consensus read representing multiple independent observations - final boolean isReduced = ReadUtils.isReducedRead(p.getRead()); + final boolean isReduced = p.isReducedRead(); readCounts[readIdx] = isReduced ? p.getReducedCount() : 1; // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) @@ -414,8 +424,6 @@ public class PairHMMIndelErrorModel { continue; } - double[] recalQuals = null; - // get bases of candidate haplotypes that overlap with reads final int trailingBases = 3; @@ -534,6 +542,12 @@ public class PairHMMIndelErrorModel { unclippedReadBases.length-numEndClippedBases); int j=0; + + // initialize path metric and traceback memories for likelihood computation + double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null; + byte[] previousHaplotypeSeen = null; + double[] previousGOP = null; + int startIdx; for (Allele a: haplotypeMap.keySet()) { @@ -551,11 +565,37 @@ public class PairHMMIndelErrorModel { byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(), (int)indStart, (int)indStop); + double readLikelihood; + if (matchMetricArray == null) { + final int X_METRIC_LENGTH = readBases.length+1; + final int Y_METRIC_LENGTH = haplotypeBases.length+1; + + matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + } final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop); final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop); - final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, - currentContextGOP, currentContextGCP, eventLength); + if (previousHaplotypeSeen == null) + startIdx = 0; + else { + int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); + int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); + startIdx = Math.min(s1,s2); + } + previousHaplotypeSeen = haplotypeBases.clone(); + previousGOP = currentContextGOP.clone(); + + + readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, + currentContextGOP, currentContextGCP, startIdx, matchMetricArray, XMetricArray, YMetricArray); + if (DEBUG) { + System.out.println("H:"+new String(haplotypeBases)); + System.out.println("R:"+new String(readBases)); + System.out.format("L:%4.2f\n",readLikelihood); + System.out.format("StPos:%d\n", startIdx); + } readEl.put(a,readLikelihood); readLikelihoods[readIdx][j++] = readLikelihood; } @@ -579,6 +619,28 @@ public class PairHMMIndelErrorModel { return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods); } + private int computeFirstDifferingPosition(byte[] b1, byte[] b2) { + if (b1.length != b2.length) + return 0; // sanity check + + for (int i=0; i < b1.length; i++ ){ + if ( b1[i]!= b2[i]) + return i; + } + return 0; // sanity check + } + + private int computeFirstDifferingPosition(double[] b1, double[] b2) { + if (b1.length != b2.length) + return 0; // sanity check + + for (int i=0; i < b1.length; i++ ){ + if ( b1[i]!= b2[i]) + return i; + } + return 0; // sanity check + } + private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index e117454f9..e10334a77 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -2,9 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; +import java.util.EnumSet; import java.util.List; /* @@ -46,6 +49,9 @@ import java.util.List; */ public class CycleCovariate implements StandardCovariate { + private final static EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); + private final static EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); + // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { if( RAC.DEFAULT_PLATFORM != null ) { @@ -58,122 +64,6 @@ public class CycleCovariate implements StandardCovariate { } } - /* - // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final SAMRecord read, final int offset ) { - - int cycle = 1; - - //----------------------------- - // ILLUMINA and SOLID - //----------------------------- - - if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX" - read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID" - cycle = offset + 1; - if( read.getReadNegativeStrandFlag() ) { - cycle = read.getReadLength() - offset; - } - } - - //----------------------------- - // 454 - //----------------------------- - - else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454" - final byte[] bases = read.getReadBases(); - - // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change - // For example, AAAAAAA was probably read in two flow cycles but here we count it as one - if( !read.getReadNegativeStrandFlag() ) { // Forward direction - int iii = 0; - while( iii <= offset ) - { - while( iii <= offset && bases[iii] == (byte)'T' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'A' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'C' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'G' ) { iii++; } - if( iii <= offset ) { cycle++; } - if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; } - - } - } else { // Negative direction - int iii = bases.length-1; - while( iii >= offset ) - { - while( iii >= offset && bases[iii] == (byte)'T' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'A' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'C' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'G' ) { iii--; } - if( iii >= offset ) { cycle++; } - if( iii >= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii--; } - } - } - } - - //----------------------------- - // SOLID (unused), only to be used in conjunction with PrimerRoundCovariate - //----------------------------- - - //else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { - // // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - // int pos = offset + 1; - // if( read.getReadNegativeStrandFlag() ) { - // pos = read.getReadLength() - offset; - // } - // cycle = pos / 5; // integer division - //} - - //----------------------------- - // UNRECOGNIZED PLATFORM - //----------------------------- - - else { // Platform is unrecognized so revert to the default platform but warn the user first - if( defaultPlatform != null) { // The user set a default platform - if( !warnedUserBadPlatform ) { - Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Defaulting to platform = " + defaultPlatform + "." ); - } - warnedUserBadPlatform = true; - - read.getReadGroup().setPlatform( defaultPlatform ); - return getValue( read, offset ); // A recursive call - } else { // The user did not set a default platform - throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "No default platform specified. Users must set the default platform using the --default_platform argument." ); - } - } - - // Differentiate between first and second of pair. - // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group - // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair. - // Therefore the cycle covariate must differentiate between first and second of pair reads. - // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because - // the current sequential model would consider the effects independently instead of jointly. - if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) { - cycle *= -1; - } - - return cycle; - } - */ - - // todo -- this should be put into a common place in the code base - private static List ILLUMINA_NAMES = Arrays.asList("ILLUMINA", "SLX", "SOLEXA"); - private static List SOLID_NAMES = Arrays.asList("SOLID"); - private static List LS454_NAMES = Arrays.asList("454"); - private static List COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE"); - private static List PACBIO_NAMES = Arrays.asList("PACBIO"); - private static List ION_TORRENT_NAMES = Arrays.asList("IONTORRENT"); - - private static boolean isPlatform(SAMRecord read, List names) { - String pl = read.getReadGroup().getPlatform().toUpperCase(); - for ( String name : names ) - if ( pl.contains( name ) ) - return true; - return false; - } - // Used to pick out the covariate's value from attributes of the read public void getValues(SAMRecord read, Comparable[] comparable) { @@ -181,7 +71,8 @@ public class CycleCovariate implements StandardCovariate { // Illumina, Solid, PacBio, and Complete Genomics //----------------------------- - if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES) || isPlatform(read, COMPLETE_GENOMICS_NAMES) ) { + final NGSPlatform ngsPlatform = ((GATKSAMRecord)read).getNGSPlatform(); + if( DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform) ) { final int init; final int increment; if( !read.getReadNegativeStrandFlag() ) { @@ -227,8 +118,7 @@ public class CycleCovariate implements StandardCovariate { //----------------------------- // 454 and Ion Torrent //----------------------------- - - else if ( isPlatform(read, LS454_NAMES) || isPlatform(read, ION_TORRENT_NAMES)) { // Some bams have "LS454" and others have just "454" + else if( FLOW_CYCLE_PLATFORMS.contains(ngsPlatform) ) { final int readLength = read.getReadLength(); final byte[] bases = read.getReadBases(); @@ -273,8 +163,6 @@ public class CycleCovariate implements StandardCovariate { else { throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform()); } - - } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 2daa8c025..a0c928afa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -228,8 +229,7 @@ public class RecalDataManager { * @param RAC The list of shared command line arguments */ public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) { - - SAMReadGroupRecord readGroup = read.getReadGroup(); + GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord)read).getReadGroup(); // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments if( readGroup == null ) { @@ -241,7 +241,7 @@ public class RecalDataManager { warnUserNullReadGroup = true; } // There is no readGroup so defaulting to these values - readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); + readGroup = new GATKSAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); ((GATKSAMRecord)read).setReadGroup( readGroup ); } else { @@ -251,7 +251,7 @@ public class RecalDataManager { if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user final String oldPlatform = readGroup.getPlatform(); - readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP ); + readGroup = new GATKSAMReadGroupRecord( RAC.FORCE_READ_GROUP ); readGroup.setPlatform( oldPlatform ); ((GATKSAMRecord)read).setReadGroup( readGroup ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java new file mode 100644 index 000000000..1b9513b9a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stratifies the eval RODs by the indel size + * + * Indel sizes are stratified from sizes -100 to +100. Sizes greater than this are lumped in the +/- 100 bin + * This stratification ignores multi-allelic indels (whose size is not defined uniquely) + */ +public class IndelSize extends VariantStratifier { + static final int MAX_INDEL_SIZE = 100; + @Override + public void initialize() { + states = new ArrayList(); + for( int a=-MAX_INDEL_SIZE; a <=MAX_INDEL_SIZE; a++ ) { + states.add(String.format("%d", a)); + } + } + + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + if (eval != null && eval.isIndel() && eval.isBiallelic()) { + try { + int eventLength = 0; + if ( eval.isSimpleInsertion() ) { + eventLength = eval.getAlternateAllele(0).length(); + } else if ( eval.isSimpleDeletion() ) { + eventLength = -eval.getReference().length(); + } + + if (eventLength > MAX_INDEL_SIZE) + eventLength = MAX_INDEL_SIZE; + else if (eventLength < -MAX_INDEL_SIZE) + eventLength = -MAX_INDEL_SIZE; + + relevantStates.add(String.format("%d",eventLength)); + } catch (Exception e) { + return relevantStates; + } + } + + return relevantStates; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java new file mode 100644 index 000000000..4f01f2b7a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; + +/** + * A canonical, master list of the standard NGS platforms. These values + * can be obtained (efficiently) from a GATKSAMRecord object with the + * getNGSPlatform method. + * + * @author Mark DePristo + * @since 2011 + */ +public enum NGSPlatform { + ILLUMINA("ILLUMINA", "SLX", "SOLEXA"), + SOLID("SOLID"), + LS454("454"), + COMPLETE_GENOMICS("COMPLETE"), + PACBIO("PACBIO"), + ION_TORRENT("IONTORRENT"), + UNKNOWN("UNKNOWN"); + + /** + * Array of the prefix names in a BAM file for each of the platforms. + */ + private final String[] BAM_PL_NAMES; + + NGSPlatform(final String... BAM_PL_NAMES) { + for ( int i = 0; i < BAM_PL_NAMES.length; i++ ) + BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase(); + this.BAM_PL_NAMES = BAM_PL_NAMES; + } + + /** + * Returns a representative PL string for this platform + * @return + */ + public final String getDefaultPlatform() { + return BAM_PL_NAMES[0]; + } + + /** + * Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord. + * Note you should not use this function if you have a GATKSAMRecord -- use the + * accessor method instead. + * + * @param read + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromRead(SAMRecord read) { + return fromReadGroup(read.getReadGroup()); + } + + /** + * Returns the NGSPlatform corresponding to the PL tag in the read group + * @param rg + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) { + return fromReadGroupPL(rg.getPlatform()); + } + + /** + * Returns the NGSPlatform corresponding to the PL tag in the read group + * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromReadGroupPL(final String plFromRG) { + if ( plFromRG == null ) return UNKNOWN; + + // todo -- algorithm could be implemented more efficiently, as the list of all + // todo -- names is known upfront, so a decision tree could be used to identify + // todo -- a prefix common to PL + final String pl = plFromRG.toUpperCase(); + for ( final NGSPlatform ngsPlatform : NGSPlatform.values() ) { + for ( final String bamPLName : ngsPlatform.BAM_PL_NAMES ) { + if ( pl.contains(bamPLName) ) + return ngsPlatform; + } + } + + return UNKNOWN; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index 58f7942fe..9180447b9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -25,35 +25,35 @@ package org.broadinstitute.sting.utils.R; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; -import org.broadinstitute.sting.utils.PathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; +import org.broadinstitute.sting.utils.io.Resource; +import org.broadinstitute.sting.utils.runtime.ProcessController; +import org.broadinstitute.sting.utils.runtime.ProcessSettings; import java.io.File; -import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** - * Generic service for executing RScripts in the GATK directory - * - * @author Your Name - * @since Date created + * Generic service for executing RScripts */ public class RScriptExecutor { /** * our log */ - protected static Logger logger = Logger.getLogger(RScriptExecutor.class); + private static Logger logger = Logger.getLogger(RScriptExecutor.class); public static class RScriptArgumentCollection { @Advanced - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. Defaults Rscript meaning to use the first available on the environment PATH. For Broad users should 'use R-2.12' or later.", required = false) public String PATH_TO_RSCRIPT = "Rscript"; @Advanced @@ -62,40 +62,119 @@ public class RScriptExecutor { public RScriptArgumentCollection() {} - /** For testing and convenience */ + /* For testing and convenience */ public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) { this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT; this.PATH_TO_RESOURCES = PATH_TO_RESOURCES; } } - final RScriptArgumentCollection myArgs; - final boolean exceptOnError; + private final RScriptArgumentCollection myArgs; + private final boolean exceptOnError; + private final List libraries = new ArrayList(); + private final List scriptResources = new ArrayList(); + private final List scriptFiles = new ArrayList(); + private final List args = new ArrayList(); public RScriptExecutor(final RScriptArgumentCollection myArgs, final boolean exceptOnError) { this.myArgs = myArgs; this.exceptOnError = exceptOnError; } - public void callRScripts(String scriptName, Object... scriptArgs) { - callRScripts(scriptName, Arrays.asList(scriptArgs)); + public void addLibrary(RScriptLibrary library) { + this.libraries.add(library); } - public void callRScripts(String scriptName, List scriptArgs) { + public void addScript(Resource script) { + this.scriptResources.add(script); + } + + public void addScript(File script) { + this.scriptFiles.add(script); + } + + /** + * Adds args to the end of the Rscript command line. + * @param args the args. + * @throws NullPointerException if any of the args are null. + */ + public void addArgs(Object... args) { + for (Object arg: args) + this.args.add(arg.toString()); + } + + public void exec() { + List tempFiles = new ArrayList(); try { - final File pathToScript = findScript(scriptName); - if ( pathToScript == null ) return; // we failed but shouldn't exception out - final String argString = Utils.join(" ", scriptArgs); - final String cmdLine = Utils.join(" ", Arrays.asList(myArgs.PATH_TO_RSCRIPT, pathToScript, argString)); - logger.info("Executing RScript: " + cmdLine); - Runtime.getRuntime().exec(cmdLine).waitFor(); - } catch (InterruptedException e) { + File tempLibDir = IOUtils.tempDir("R.", ".lib"); + tempFiles.add(tempLibDir); + + StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';"); + + if (this.libraries.size() > 0) { + List tempLibraryPaths = new ArrayList(); + for (RScriptLibrary library: this.libraries) { + File tempLibrary = library.writeTemp(); + tempFiles.add(tempLibrary); + tempLibraryPaths.add(tempLibrary.getAbsolutePath()); + } + + expression.append("install.packages("); + expression.append("pkgs=c('").append(StringUtils.join(tempLibraryPaths, "', '")).append("'), lib=tempLibDir, repos=NULL, type='source', "); + // Install faster by eliminating cruft. + expression.append("INSTALL_opts=c('--no-libs', '--no-data', '--no-help', '--no-demo', '--no-exec')"); + expression.append(");"); + + for (RScriptLibrary library: this.libraries) { + expression.append("require('").append(library.getLibraryName()).append("', lib.loc=tempLibDir);"); + } + } + + for (Resource script: this.scriptResources) { + File tempScript = IOUtils.writeTempResource(script); + tempFiles.add(tempScript); + expression.append("source('").append(tempScript.getAbsolutePath()).append("');"); + } + + for (File script: this.scriptFiles) { + expression.append("source('").append(script.getAbsolutePath()).append("');"); + } + + String[] cmd = new String[this.args.size() + 3]; + int i = 0; + cmd[i++] = myArgs.PATH_TO_RSCRIPT; + cmd[i++] = "-e"; + cmd[i++] = expression.toString(); + for (String arg: this.args) + cmd[i++] = arg; + + ProcessSettings processSettings = new ProcessSettings(cmd); + if (logger.isDebugEnabled()) { + processSettings.getStdoutSettings().printStandard(true); + processSettings.getStderrSettings().printStandard(true); + } + + ProcessController controller = ProcessController.getThreadLocal(); + + logger.debug("Executing: " + Utils.join(" ", cmd)); + logger.debug("Result: " + controller.exec(processSettings).getExitValue()); + + } catch (StingException e) { generateException(e); - } catch (IOException e) { - generateException("Fatal Exception: Perhaps RScript jobs are being spawned too quickly?", e); + } finally { + for (File temp: tempFiles) + FileUtils.deleteQuietly(temp); } } + public void callRScripts(String scriptName, Object... scriptArgs) { + final File pathToScript = findScript(scriptName); + if (pathToScript == null) return; // we failed but shouldn't exception out + addScript(pathToScript); + addArgs(scriptArgs); + exec(); + } + public File findScript(final String scriptName) { for ( String pathToResource : myArgs.PATH_TO_RESOURCES ) { final File f = new File(pathToResource + "/" + scriptName); diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java new file mode 100644 index 000000000..60cd7504b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.broadinstitute.sting.utils.io.IOUtils; +import org.broadinstitute.sting.utils.io.Resource; + +import java.io.File; + +/** + * Libraries embedded in the StingUtils package. + */ +public enum RScriptLibrary { + GSALIB("gsalib"); + + private final String name; + + private RScriptLibrary(String name) { + this.name = name; + } + + public String getLibraryName() { + return this.name; + } + + public String getResourcePath() { + return name + ".tar.gz"; + } + + /** + * Writes the library source code to a temporary tar.gz file and returns the path. + * @return The path to the library source code. The caller must delete the code when done. + */ + public File writeTemp() { + return IOUtils.writeTempResource(new Resource(getResourcePath(), RScriptLibrary.class)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index a9d71ef98..25ef8ccd2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -29,6 +29,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentDefinition; import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; import org.broadinstitute.sting.commandline.ArgumentDefinitions; +import org.broadinstitute.sting.commandline.ArgumentMatchSource; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -47,6 +48,7 @@ public class HelpFormatter { /** * Prints the help, given a collection of argument definitions. + * @param applicationDetails Application details * @param argumentDefinitions Argument definitions for which help should be printed. */ public void printHelp( ApplicationDetails applicationDetails, ArgumentDefinitions argumentDefinitions ) { @@ -233,7 +235,7 @@ public class HelpFormatter { private List prepareArgumentGroups( ArgumentDefinitions argumentDefinitions ) { // Sort the list of argument definitions according to how they should be shown. // Put the sorted results into a new cloned data structure. - Comparator definitionComparator = new Comparator() { + Comparator definitionComparator = new Comparator() { public int compare( ArgumentDefinition lhs, ArgumentDefinition rhs ) { if( lhs.required && rhs.required ) return 0; if( lhs.required ) return -1; @@ -242,15 +244,15 @@ public class HelpFormatter { } }; - List argumentGroups = new ArrayList(); + List argumentGroups = new ArrayList(); for( ArgumentDefinitionGroup argumentGroup: argumentDefinitions.getArgumentDefinitionGroups() ) { - List sortedDefinitions = new ArrayList( argumentGroup.argumentDefinitions ); + List sortedDefinitions = new ArrayList( argumentGroup.argumentDefinitions ); Collections.sort( sortedDefinitions, definitionComparator ); argumentGroups.add( new ArgumentDefinitionGroup(argumentGroup.groupName,sortedDefinitions) ); } // Sort the argument groups themselves with main arguments first, followed by plugins sorted in name order. - Comparator groupComparator = new Comparator() { + Comparator groupComparator = new Comparator() { public int compare( ArgumentDefinitionGroup lhs, ArgumentDefinitionGroup rhs ) { if( lhs.groupName == null && rhs.groupName == null ) return 0; if( lhs.groupName == null ) return -1; @@ -271,9 +273,9 @@ public class HelpFormatter { * Generate a standard header for the logger * * @param applicationDetails details of the application to run. - * @param args the command line arguments passed in + * @param parsedArgs the command line arguments passed in */ - public static void generateHeaderInformation(ApplicationDetails applicationDetails, String[] args) { + public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map> parsedArgs) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new java.util.Date(); @@ -283,11 +285,22 @@ public class HelpFormatter { logger.info(barrier); for (String headerLine : applicationDetails.applicationHeader) logger.info(headerLine); - String output = ""; - for (String str : args) { - output = output + str + " "; + logger.debug("Current directory: " + System.getProperty("user.dir")); + for (Map.Entry> entry: parsedArgs.entrySet()) { + ArgumentMatchSource matchSource = entry.getKey(); + final String sourceName; + switch (matchSource.getType()) { + case CommandLine: sourceName = "Program"; break; + case File: sourceName = matchSource.getFile().getPath(); break; + default: throw new RuntimeException("Unexpected argument match source type: " + matchSource.getType()); + } + + String output = sourceName + " Args:"; + for (String str : entry.getValue()) { + output = output + " " + str; + } + logger.info(output); } - logger.info("Program Args: " + output); logger.info("Date/Time: " + dateFormat.format(date)); logger.info(barrier); diff --git a/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java b/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java new file mode 100644 index 000000000..cd69ee126 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +import java.io.File; + +public interface FileExtension { + /** + * Returns a clone of the FileExtension with a new path. + * @param path New path. + * @return New FileExtension + */ + public File withPath(String path); +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java b/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java new file mode 100755 index 000000000..26b5ae6fd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.output.ThresholdingOutputStream; + +import java.io.IOException; + +/** + * An output stream which stops at the threshold + * instead of potentially triggering early. + */ +public abstract class HardThresholdingOutputStream extends ThresholdingOutputStream { + protected HardThresholdingOutputStream(int threshold) { + super(threshold); + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + int remaining = this.getThreshold() - (int)this.getByteCount(); + if (!isThresholdExceeded() && len > remaining) { + super.write(b, off, remaining); + super.write(b, off + remaining, len - remaining); + } else { + super.write(b, off, len); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java new file mode 100644 index 000000000..7bfaa0194 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.LineIterator; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.*; +import java.util.*; + +public class IOUtils { + private static Logger logger = Logger.getLogger(IOUtils.class); + + /** + * Checks if the temp directory has been setup and throws an exception if they user hasn't set it correctly. + * + * @param tempDir Temporary directory. + */ + public static void checkTempDir(File tempDir) { + String tempDirPath = tempDir.getAbsolutePath(); + // Keeps the user from leaving the temp directory as the default, and on Macs from having pluses + // in the path which can cause problems with the Google Reflections library. + // see also: http://benjchristensen.com/2009/09/22/mac-osx-10-6-java-java-io-tmpdir/ + if (tempDirPath.startsWith("/var/folders/") || (tempDirPath.equals("/tmp")) || (tempDirPath.equals("/tmp/"))) + throw new UserException.BadTmpDir("java.io.tmpdir must be explicitly set"); + if (!tempDir.exists() && !tempDir.mkdirs()) + throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath()); + } + + /** + * Creates a temp directory with the prefix and optional suffix. + * + * @param prefix Prefix for the directory name. + * @param suffix Optional suffix for the directory name. + * @return The created temporary directory. + */ + public static File tempDir(String prefix, String suffix) { + return tempDir(prefix, suffix, null); + } + + /** + * Creates a temp directory with the prefix and optional suffix. + * + * @param prefix Prefix for the directory name. + * @param suffix Optional suffix for the directory name. + * @param tempDirParent Parent directory for the temp directory. + * @return The created temporary directory. + */ + public static File tempDir(String prefix, String suffix, File tempDirParent) { + try { + if (tempDirParent == null) + tempDirParent = FileUtils.getTempDirectory(); + if (!tempDirParent.exists() && !tempDirParent.mkdirs()) + throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent); + File temp = File.createTempFile(prefix + "-", suffix, tempDirParent); + if (!temp.delete()) + throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath()); + if (!temp.mkdir()) + throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath()); + return absolute(temp); + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + } + + /** + * Writes content to a temp file and returns the path to the temporary file. + * + * @param content to write. + * @param prefix Prefix for the temp file. + * @param suffix Suffix for the temp file. + * @param directory Directory for the temp file. + * @return the path to the temp file. + */ + public static File writeTempFile(String content, String prefix, String suffix, File directory) { + try { + File tempFile = absolute(File.createTempFile(prefix, suffix, directory)); + FileUtils.writeStringToFile(tempFile, content); + return tempFile; + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + } + + /** + * Waits for NFS to propagate a file creation, imposing a timeout. + * + * Based on Apache Commons IO FileUtils.waitFor() + * + * @param file The file to wait for. + * @param seconds The maximum time in seconds to wait. + * @return true if the file exists + */ + public static boolean waitFor(File file, int seconds) { + return waitFor(Collections.singletonList(file), seconds).isEmpty(); + } + + /** + * Waits for NFS to propagate a file creation, imposing a timeout. + * + * Based on Apache Commons IO FileUtils.waitFor() + * + * @param files The list of files to wait for. + * @param seconds The maximum time in seconds to wait. + * @return Files that still do not exists at the end of the timeout, or a empty list if all files exists. + */ + public static List waitFor(Collection files, int seconds) { + long timeout = 0; + long tick = 0; + List missingFiles = new ArrayList(); + for (File file : files) + if (!file.exists()) + missingFiles.add(file); + + while (!missingFiles.isEmpty() && timeout <= seconds) { + if (tick >= 10) { + tick = 0; + timeout++; + } + tick++; + try { + Thread.sleep(100); + } catch (InterruptedException ignore) { + } + List newMissingFiles = new ArrayList(); + for (File file : missingFiles) + if (!file.exists()) + newMissingFiles.add(file); + missingFiles = newMissingFiles; + } + return missingFiles; + } + + /** + * Returns the directory at the number of levels deep. + * For example 2 levels of /path/to/dir will return /path/to + * + * @param dir Directory path. + * @param level how many levels deep from the root. + * @return The path to the parent directory that is level-levels deep. + */ + public static File dirLevel(File dir, int level) { + List directories = new ArrayList(); + File parentDir = absolute(dir); + while (parentDir != null) { + directories.add(0, parentDir); + parentDir = parentDir.getParentFile(); + } + if (directories.size() <= level) + return directories.get(directories.size() - 1); + else + return directories.get(level); + } + + /** + * Returns the sub path rooted at the parent. + * + * @param parent The parent directory. + * @param path The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + public static File absolute(File parent, String path) { + return absolute(parent, new File(path)); + } + + /** + * Returns the sub path rooted at the parent. + * + * @param parent The parent directory. + * @param file The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + public static File absolute(File parent, File file) { + String newPath; + if (file.isAbsolute()) + newPath = absolutePath(file); + else + newPath = absolutePath(new File(parent, file.getPath())); + return replacePath(file, newPath); + } + + /** + * A mix of getCanonicalFile and getAbsoluteFile that returns the + * absolute path to the file without deferencing symbolic links. + * + * @param file the file. + * @return the absolute path to the file. + */ + public static File absolute(File file) { + return replacePath(file, absolutePath(file)); + } + + private static String absolutePath(File file) { + File fileAbs = file.getAbsoluteFile(); + LinkedList names = new LinkedList(); + while (fileAbs != null) { + String name = fileAbs.getName(); + fileAbs = fileAbs.getParentFile(); + + if (".".equals(name)) { + /* skip */ + + /* TODO: What do we do for ".."? + } else if (name == "..") { + + CentOS tcsh says use getCanonicalFile: + ~ $ mkdir -p test1/test2 + ~ $ ln -s test1/test2 test3 + ~ $ cd test3/.. + ~/test1 $ + + Mac bash says keep going with getAbsoluteFile: + ~ $ mkdir -p test1/test2 + ~ $ ln -s test1/test2 test3 + ~ $ cd test3/.. + ~ $ + + For now, leave it and let the shell figure it out. + */ + } else { + names.add(0, name); + } + } + + return ("/" + StringUtils.join(names, "/")); + } + + private static File replacePath(File file, String path) { + if (file instanceof FileExtension) + return ((FileExtension)file).withPath(path); + if (!File.class.equals(file.getClass())) + throw new StingException("Sub classes of java.io.File must also implement FileExtension"); + return new File(path); + } + + /** + * Returns the last lines of the file. + * NOTE: This is only safe to run on smaller files! + * + * @param file File to read. + * @param count Maximum number of lines to return. + * @return The last count lines from file. + * @throws IOException When unable to read the file. + */ + public static List tail(File file, int count) throws IOException { + LinkedList tailLines = new LinkedList(); + FileReader reader = new FileReader(file); + try { + LineIterator iterator = org.apache.commons.io.IOUtils.lineIterator(reader); + int lineCount = 0; + while (iterator.hasNext()) { + String line = iterator.nextLine(); + lineCount++; + if (lineCount > count) + tailLines.removeFirst(); + tailLines.offer(line); + } + } finally { + org.apache.commons.io.IOUtils.closeQuietly(reader); + } + return tailLines; + } + + /** + * Tries to delete a file. Emits a warning if the file was unable to be deleted. + * + * @param file File to delete. + * @return true if the file was deleted. + */ + public static boolean tryDelete(File file) { + boolean deleted = FileUtils.deleteQuietly(file); + if (deleted) + logger.debug("Deleted " + file); + else if (file.exists()) + logger.warn("Unable to delete " + file); + return deleted; + } + + /** + * Writes the an embedded resource to a temp file. + * File is not scheduled for deletion and must be cleaned up by the caller. + * @param resource Embedded resource. + * @return Path to the temp file with the contents of the resource. + */ + public static File writeTempResource(Resource resource) { + File temp; + try { + temp = File.createTempFile(FilenameUtils.getBaseName(resource.getPath()) + ".", "." + FilenameUtils.getExtension(resource.getPath())); + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + writeResource(resource, temp); + return temp; + } + + /** + * Writes the an embedded resource to a file. + * File is not scheduled for deletion and must be cleaned up by the caller. + * @param resource Embedded resource. + * @param file File path to write. + */ + public static void writeResource(Resource resource, File file) { + String path = resource.getPath(); + Class clazz = resource.getRelativeClass(); + InputStream inputStream = null; + OutputStream outputStream = null; + try { + if (clazz == null) { + inputStream = ClassLoader.getSystemResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found: " + path); + } else { + inputStream = clazz.getResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path); + } + outputStream = FileUtils.openOutputStream(file); + org.apache.commons.io.IOUtils.copy(inputStream, outputStream); + } catch (IOException e) { + throw new StingException(String.format("Unable to copy resource '%s' to '%s'", path, file), e); + } finally { + org.apache.commons.io.IOUtils.closeQuietly(inputStream); + org.apache.commons.io.IOUtils.closeQuietly(outputStream); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/Resource.java b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java new file mode 100644 index 000000000..5473511b4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +/** + * Stores a resource by path and a relative class. + */ +public class Resource { + private final String path; + private final Class relativeClass; + + /** + * Create a resource with a path and a relative class. + * @param path Relative or absolute path to the class. + * @param relativeClass Relative class to use as a class loader and for a relative package. + * + * If the relative class is null then the system classloader will be used and the path must be absolute. + */ + public Resource(String path, Class relativeClass) { + this.path = path; + this.relativeClass = relativeClass; + } + + public Class getRelativeClass() { + return relativeClass; + } + + public String getPath() { + return path; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index f7d237401..4eda7c7cd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.utils.pileup; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; + +import java.util.*; /** * An easy to access fragment-based pileup, which contains two separate pileups. The first @@ -13,31 +14,51 @@ import java.util.Map; * * Based on the original code by E. Banks * - * TODO -- technically we could generalize this code to support a pseudo-duplicate marking - * TODO -- algorithm that could collect all duplicates into a single super pileup element + * Oct 21: note that the order of the oneReadPileup and twoReadPileups are not + * defined. The algorithms that produce these lists are in fact producing + * lists of Pileup elements *NOT* sorted by alignment start position of the underlying + * reads. * * User: depristo * Date: 3/26/11 * Time: 10:09 PM */ public class FragmentPileup { - final Collection oneReadPile; - final Collection twoReadPile = new ArrayList(); + Collection oneReadPile = null; + Collection twoReadPile = null; + + protected enum FragmentMatchingAlgorithm { + ORIGINAL, + skipNonOverlapping, + } /** * Create a new Fragment-based pileup from the standard read-based pileup * @param pileup */ public FragmentPileup(ReadBackedPileup pileup) { - Map nameMap = new HashMap(); + skipNonOverlapping(pileup); + } + + /** For performance testing only */ + protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { + switch ( algorithm ) { + case ORIGINAL: oldSlowCalculation(pileup); break; + case skipNonOverlapping: skipNonOverlapping(pileup); break; + } + } + + private final void oldSlowCalculation(final ReadBackedPileup pileup) { + final Map nameMap = new HashMap(pileup.size()); // build an initial map, grabbing all of the multi-read fragments - for ( PileupElement p : pileup ) { - String readName = p.getRead().getReadName(); + for ( final PileupElement p : pileup ) { + final String readName = p.getRead().getReadName(); - PileupElement pe1 = nameMap.get(readName); + final PileupElement pe1 = nameMap.get(readName); if ( pe1 != null ) { // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); twoReadPile.add(new TwoReadPileupElement(pe1, p)); nameMap.remove(readName); } else { @@ -45,17 +66,54 @@ public class FragmentPileup { } } - // now set the one Read pile to the values in the nameMap with only a single read oneReadPile = nameMap.values(); } + private final void skipNonOverlapping(final ReadBackedPileup pileup) { + Map nameMap = null; + + // build an initial map, grabbing all of the multi-read fragments + for ( final PileupElement p : pileup ) { + final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); + + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); // lazy init + oneReadPile.add(p); + } else { + // the read might overlap it's mate, or is the rightmost read of a pair + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); // lazy init + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + if ( nameMap == null ) nameMap = new HashMap(pileup.size()); // lazy init + nameMap.put(readName, p); + } + } + } + + // add all of the reads that are potentially overlapping but whose mate never showed + // up to the oneReadPile + if ( nameMap != null && ! nameMap.isEmpty() ) { + if ( oneReadPile == null ) + oneReadPile = nameMap.values(); + else + oneReadPile.addAll(nameMap.values()); + } + } + /** * Gets the pileup elements containing two reads, in no particular order * * @return */ public Collection getTwoReadPileup() { - return twoReadPile; + return twoReadPile == null ? Collections.emptyList() : twoReadPile; } /** @@ -64,7 +122,7 @@ public class FragmentPileup { * @return */ public Collection getOneReadPileup() { - return oneReadPile; + return oneReadPile == null ? Collections.emptyList() : oneReadPile; } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index f6ed792a5..3d6b6f4b9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -4,6 +4,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -12,7 +13,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; * Date: Apr 14, 2009 * Time: 8:54:05 AM */ -public class PileupElement { +public class PileupElement implements Comparable { public static final byte DELETION_BASE = BaseUtils.D; public static final byte DELETION_QUAL = (byte) 16; public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87; @@ -75,6 +76,20 @@ public class PileupElement { return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; } + @Override + public int compareTo(final PileupElement pileupElement) { + if ( offset < pileupElement.offset ) + return -1; + else if ( offset > pileupElement.offset ) + return 1; + else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() ) + return -1; + else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() ) + return 1; + else + return 0; + } + // -------------------------------------------------------------------------- // // Reduced read accessors @@ -82,16 +97,16 @@ public class PileupElement { // -------------------------------------------------------------------------- public boolean isReducedRead() { - return ReadUtils.isReducedRead(getRead()); + return ((GATKSAMRecord)read).isReducedRead(); } public int getReducedCount() { if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName()); - return ReadUtils.getReducedCount(getRead(), offset); + return ((GATKSAMRecord)read).getReducedCount(offset); } public byte getReducedQual() { if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName()); - return ReadUtils.getReducedQual(getRead(), offset); + return getQual(); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java new file mode 100755 index 000000000..50622cef1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.HardThresholdingOutputStream; + +import java.io.*; +import java.util.EnumMap; + +/** + * Stream output captured from a stream. + */ +public class CapturedStreamOutput extends StreamOutput { + private final InputStream processStream; + private final EnumMap outputStreams = new EnumMap(StreamLocation.class); + + /** + * The byte stream to capture content or null if no output string content was requested. + */ + private final ByteArrayOutputStream bufferStream; + + /** + * True if the buffer is truncated. + */ + private boolean bufferTruncated = false; + + /** + * @param settings Settings that define what to capture. + * @param processStream Stream to capture output. + * @param standardStream Stream to write debug output. + */ + public CapturedStreamOutput(OutputStreamSettings settings, InputStream processStream, PrintStream standardStream) { + this.processStream = processStream; + int bufferSize = settings.getBufferSize(); + this.bufferStream = (bufferSize < 0) ? new ByteArrayOutputStream() : new ByteArrayOutputStream(bufferSize); + + for (StreamLocation location : settings.getStreamLocations()) { + OutputStream outputStream; + switch (location) { + case Buffer: + if (bufferSize < 0) { + outputStream = this.bufferStream; + } else { + outputStream = new HardThresholdingOutputStream(bufferSize) { + @Override + protected OutputStream getStream() throws IOException { + return bufferTruncated ? NullOutputStream.NULL_OUTPUT_STREAM : bufferStream; + } + + @Override + protected void thresholdReached() throws IOException { + bufferTruncated = true; + } + }; + } + break; + case File: + try { + outputStream = new FileOutputStream(settings.getOutputFile(), settings.isAppendFile()); + } catch (IOException e) { + throw new UserException.BadInput(e.getMessage()); + } + break; + case Standard: + outputStream = standardStream; + break; + default: + throw new ReviewedStingException("Unexpected stream location: " + location); + } + this.outputStreams.put(location, outputStream); + } + } + + @Override + public byte[] getBufferBytes() { + return bufferStream.toByteArray(); + } + + @Override + public boolean isBufferTruncated() { + return bufferTruncated; + } + + /** + * Drain the input stream to keep the process from backing up until it's empty. + * File streams will be closed automatically when this method returns. + * + * @throws java.io.IOException When unable to read or write. + */ + public void readAndClose() throws IOException { + try { + byte[] buf = new byte[4096]; + int readCount; + while ((readCount = processStream.read(buf)) >= 0) + for (OutputStream outputStream : this.outputStreams.values()) { + outputStream.write(buf, 0, readCount); + } + } finally { + for (StreamLocation location : this.outputStreams.keySet()) { + OutputStream outputStream = this.outputStreams.get(location); + outputStream.flush(); + if (location != StreamLocation.Standard) + IOUtils.closeQuietly(outputStream); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java new file mode 100755 index 000000000..dfa380a68 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import java.io.File; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; + +/** + * Settings that define text to write to the process stdin. + */ +public class InputStreamSettings { + private final EnumSet streamLocations = EnumSet.noneOf(StreamLocation.class); + private byte[] inputBuffer; + private File inputFile; + + public InputStreamSettings() { + } + + /** + * @param inputBuffer String to write to stdin. + */ + public InputStreamSettings(String inputBuffer) { + setInputBuffer(inputBuffer); + } + + /** + * @param inputFile File to write to stdin. + */ + public InputStreamSettings(File inputFile) { + setInputFile(inputFile); + } + + /** + * @param inputBuffer String to write to stdin. + * @param inputFile File to write to stdin. + */ + public InputStreamSettings(byte[] inputBuffer, File inputFile) { + setInputBuffer(inputBuffer); + setInputFile(inputFile); + } + + public Set getStreamLocations() { + return Collections.unmodifiableSet(streamLocations); + } + + public byte[] getInputBuffer() { + return inputBuffer; + } + + public void setInputBuffer(String inputBuffer) { + if (inputBuffer == null) + throw new IllegalArgumentException("inputBuffer cannot be null"); + this.streamLocations.add(StreamLocation.Buffer); + this.inputBuffer = inputBuffer.getBytes(); + } + + public void setInputBuffer(byte[] inputBuffer) { + if (inputBuffer == null) + throw new IllegalArgumentException("inputBuffer cannot be null"); + this.streamLocations.add(StreamLocation.Buffer); + this.inputBuffer = inputBuffer; + } + + public void clearInputBuffer() { + this.streamLocations.remove(StreamLocation.Buffer); + this.inputBuffer = null; + } + + public File getInputFile() { + return inputFile; + } + + public void setInputFile(File inputFile) { + if (inputFile == null) + throw new IllegalArgumentException("inputFile cannot be null"); + this.streamLocations.add(StreamLocation.File); + this.inputFile = inputFile; + } + + public void clearInputFile() { + this.streamLocations.remove(StreamLocation.File); + this.inputFile = null; + } + + public void setInputStandard(boolean inputStandard) { + if (inputStandard) + this.streamLocations.add(StreamLocation.Standard); + else + this.streamLocations.remove(StreamLocation.Standard); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java new file mode 100755 index 000000000..468ece178 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import java.io.File; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; + +/** + * Settings that define text to capture from a process stream. + */ +public class OutputStreamSettings { + private final EnumSet streamLocations = EnumSet.noneOf(StreamLocation.class); + private int bufferSize; + private File outputFile; + private boolean appendFile; + + public OutputStreamSettings() { + } + + /** + * @param bufferSize The number of bytes to capture, or -1 for unlimited. + */ + public OutputStreamSettings(int bufferSize) { + setBufferSize(bufferSize); + } + + /** + * @param outputFile The file to write output to. + */ + public OutputStreamSettings(File outputFile) { + setOutputFile(outputFile); + } + + /** + * @param outputFile The file to write output to. + * @param append true if the output file should be appended to. + */ + public OutputStreamSettings(File outputFile, boolean append) { + setOutputFile(outputFile, append); + } + + public OutputStreamSettings(int bufferSize, File outputFile, boolean appendFile) { + setBufferSize(bufferSize); + setOutputFile(outputFile, appendFile); + } + + public Set getStreamLocations() { + return Collections.unmodifiableSet(streamLocations); + } + + public int getBufferSize() { + return bufferSize; + } + + public void setBufferSize(int bufferSize) { + this.streamLocations.add(StreamLocation.Buffer); + this.bufferSize = bufferSize; + } + + public void clearBufferSize() { + this.streamLocations.remove(StreamLocation.Buffer); + this.bufferSize = 0; + } + + public File getOutputFile() { + return outputFile; + } + + public boolean isAppendFile() { + return appendFile; + } + + /** + * Overwrites the outputFile with the process output. + * + * @param outputFile File to overwrite. + */ + public void setOutputFile(File outputFile) { + setOutputFile(outputFile, false); + } + + public void setOutputFile(File outputFile, boolean append) { + if (outputFile == null) + throw new IllegalArgumentException("outputFile cannot be null"); + streamLocations.add(StreamLocation.File); + this.outputFile = outputFile; + this.appendFile = append; + } + + public void clearOutputFile() { + streamLocations.remove(StreamLocation.File); + this.outputFile = null; + this.appendFile = false; + } + + public void printStandard(boolean print) { + if (print) + this.streamLocations.add(StreamLocation.Standard); + else + this.streamLocations.remove(StreamLocation.Standard); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java new file mode 100755 index 000000000..6a3f9c753 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.*; + +/** + * Facade to Runtime.exec() and java.lang.Process. Handles + * running a process to completion and returns stdout and stderr + * as strings. Creates separate threads for reading stdout and stderr, + * then reuses those threads for each process most efficient use is + * to create one of these and use it repeatedly. Instances are not + * thread-safe, however. + * + * TODO: java.io sometimes zombies the backround threads locking up on read(). + * Supposedly NIO has better ways of interrupting a blocked stream but will + * require a little bit of refactoring. + * + * @author Michael Koehrsen + * @author Khalid Shakir + */ +public class ProcessController { + private static Logger logger = Logger.getLogger(ProcessController.class); + + private static enum ProcessStream {Stdout, Stderr} + + // Tracks running processes. + private static final Set running = Collections.synchronizedSet(new HashSet()); + + // Tracks this running process. + private Process process; + + // Threads that capture stdout and stderr + private final OutputCapture stdoutCapture; + private final OutputCapture stderrCapture; + + // When a caller destroyes a controller a new thread local version will be created + private boolean destroyed = false; + + // Communication channels with output capture threads + + // Holds the stdout and stderr sent to the background capture threads + private final Map toCapture = + new EnumMap(ProcessStream.class); + + // Holds the results of the capture from the background capture threads. + // May be the content via toCapture or an StreamOutput.EMPTY if the capture was interrupted. + private final Map fromCapture = + new EnumMap(ProcessStream.class); + + // Useful for debugging if background threads have shut down correctly + private static int nextControllerId = 0; + private final int controllerId; + + public ProcessController() { + // Start the background threads for this controller. + synchronized (running) { + controllerId = nextControllerId++; + } + stdoutCapture = new OutputCapture(ProcessStream.Stdout, controllerId); + stderrCapture = new OutputCapture(ProcessStream.Stderr, controllerId); + stdoutCapture.start(); + stderrCapture.start(); + } + + /** + * Returns a thread local ProcessController. + * Should NOT be closed when finished so it can be reused by the thread. + * + * @return a thread local ProcessController. + */ + public static ProcessController getThreadLocal() { + // If the local controller was destroyed get a fresh instance. + if (threadProcessController.get().destroyed) + threadProcessController.remove(); + return threadProcessController.get(); + } + + /** + * Thread local process controller container. + */ + private static final ThreadLocal threadProcessController = + new ThreadLocal() { + @Override + protected ProcessController initialValue() { + return new ProcessController(); + } + }; + + /** + * Similar to Runtime.exec() but drains the output and error streams. + * + * @param command Command to run. + * @return The result code. + */ + public static int exec(String[] command) { + ProcessController controller = ProcessController.getThreadLocal(); + return controller.exec(new ProcessSettings(command)).getExitValue(); + } + + /** + * Executes a command line program with the settings and waits for it to return, + * processing the output on a background thread. + * + * @param settings Settings to be run. + * @return The output of the command. + */ + public ProcessOutput exec(ProcessSettings settings) { + if (destroyed) + throw new IllegalStateException("This controller was destroyed"); + + ProcessBuilder builder = new ProcessBuilder(settings.getCommand()); + builder.directory(settings.getDirectory()); + + Map settingsEnvironment = settings.getEnvironment(); + if (settingsEnvironment != null) { + Map builderEnvironment = builder.environment(); + builderEnvironment.clear(); + builderEnvironment.putAll(settingsEnvironment); + } + + builder.redirectErrorStream(settings.isRedirectErrorStream()); + + StreamOutput stdout = null; + StreamOutput stderr = null; + + // Start the process running. + + try { + synchronized (toCapture) { + process = builder.start(); + } + running.add(this); + } catch (IOException e) { + throw new ReviewedStingException("Unable to start command: " + StringUtils.join(builder.command(), " ")); + } + + int exitCode; + + try { + // Notify the background threads to start capturing. + synchronized (toCapture) { + toCapture.put(ProcessStream.Stdout, + new CapturedStreamOutput(settings.getStdoutSettings(), process.getInputStream(), System.out)); + toCapture.put(ProcessStream.Stderr, + new CapturedStreamOutput(settings.getStderrSettings(), process.getErrorStream(), System.err)); + toCapture.notifyAll(); + } + + // Write stdin content + InputStreamSettings stdinSettings = settings.getStdinSettings(); + Set streamLocations = stdinSettings.getStreamLocations(); + if (!streamLocations.isEmpty()) { + try { + OutputStream stdinStream = process.getOutputStream(); + for (StreamLocation location : streamLocations) { + InputStream inputStream; + switch (location) { + case Buffer: + inputStream = new ByteArrayInputStream(stdinSettings.getInputBuffer()); + break; + case File: + try { + inputStream = FileUtils.openInputStream(stdinSettings.getInputFile()); + } catch (IOException e) { + throw new UserException.BadInput(e.getMessage()); + } + break; + case Standard: + inputStream = System.in; + break; + default: + throw new ReviewedStingException("Unexpected stream location: " + location); + } + try { + IOUtils.copy(inputStream, stdinStream); + } finally { + if (location != StreamLocation.Standard) + IOUtils.closeQuietly(inputStream); + } + } + stdinStream.flush(); + } catch (IOException e) { + throw new ReviewedStingException("Error writing to stdin on command: " + StringUtils.join(builder.command(), " "), e); + } + } + + // Wait for the process to complete. + try { + process.getOutputStream().close(); + process.waitFor(); + } catch (IOException e) { + throw new ReviewedStingException("Unable to close stdin on command: " + StringUtils.join(builder.command(), " "), e); + } catch (InterruptedException e) { + throw new ReviewedStingException("Process interrupted", e); + } finally { + while (!destroyed && stdout == null || stderr == null) { + synchronized (fromCapture) { + if (fromCapture.containsKey(ProcessStream.Stdout)) + stdout = fromCapture.remove(ProcessStream.Stdout); + if (fromCapture.containsKey(ProcessStream.Stderr)) + stderr = fromCapture.remove(ProcessStream.Stderr); + try { + if (stdout == null || stderr == null) + fromCapture.wait(); + } catch (InterruptedException e) { + // Log the error, ignore the interrupt and wait patiently + // for the OutputCaptures to (via finally) return their + // stdout and stderr. + logger.error(e); + } + } + } + + if (destroyed) { + if (stdout == null) + stdout = StreamOutput.EMPTY; + if (stderr == null) + stderr = StreamOutput.EMPTY; + } + } + } finally { + synchronized (toCapture) { + exitCode = process.exitValue(); + process = null; + } + running.remove(this); + } + + return new ProcessOutput(exitCode, stdout, stderr); + } + + /** + * @return The set of still running processes. + */ + public static Set getRunning() { + synchronized (running) { + return new HashSet(running); + } + } + + /** + * Stops the process from running and tries to ensure process is cleaned up properly. + * NOTE: sub-processes started by process may be zombied with their parents set to pid 1. + * NOTE: capture threads may block on read. + * TODO: Try to use NIO to interrupt streams. + */ + public void tryDestroy() { + destroyed = true; + synchronized (toCapture) { + if (process != null) { + process.destroy(); + IOUtils.closeQuietly(process.getInputStream()); + IOUtils.closeQuietly(process.getErrorStream()); + } + stdoutCapture.interrupt(); + stderrCapture.interrupt(); + toCapture.notifyAll(); + } + } + + @Override + protected void finalize() throws Throwable { + try { + tryDestroy(); + } catch (Exception e) { + logger.error(e); + } + super.finalize(); + } + + private class OutputCapture extends Thread { + private final int controllerId; + private final ProcessStream key; + + /** + * Reads in the output of a stream on a background thread to keep the output pipe from backing up and freezing the called process. + * + * @param key The stdout or stderr key for this output capture. + * @param controllerId Unique id of the controller. + */ + public OutputCapture(ProcessStream key, int controllerId) { + super(String.format("OutputCapture-%d-%s-%s-%d", controllerId, key.name().toLowerCase(), + Thread.currentThread().getName(), Thread.currentThread().getId())); + this.controllerId = controllerId; + this.key = key; + setDaemon(true); + } + + /** + * Runs the capture. + */ + @Override + public void run() { + while (!destroyed) { + StreamOutput processStream = StreamOutput.EMPTY; + try { + // Wait for a new input stream to be passed from this process controller. + CapturedStreamOutput capturedProcessStream = null; + while (!destroyed && capturedProcessStream == null) { + synchronized (toCapture) { + if (toCapture.containsKey(key)) { + capturedProcessStream = toCapture.remove(key); + } else { + toCapture.wait(); + } + } + } + + if (!destroyed) { + // Read in the input stream + processStream = capturedProcessStream; + capturedProcessStream.readAndClose(); + } + } catch (InterruptedException e) { + logger.info("OutputCapture interrupted, exiting"); + break; + } catch (IOException e) { + logger.error("Error reading process output", e); + } finally { + // Send the string back to the process controller. + synchronized (fromCapture) { + fromCapture.put(key, processStream); + fromCapture.notify(); + } + } + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java new file mode 100755 index 000000000..211008950 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +public class ProcessOutput { + private final int exitValue; + private final StreamOutput stdout; + private final StreamOutput stderr; + + /** + * The output of a process. + * + * @param exitValue The exit value. + * @param stdout The capture of stdout as defined by the stdout OutputStreamSettings. + * @param stderr The capture of stderr as defined by the stderr OutputStreamSettings. + */ + public ProcessOutput(int exitValue, StreamOutput stdout, StreamOutput stderr) { + this.exitValue = exitValue; + this.stdout = stdout; + this.stderr = stderr; + } + + public int getExitValue() { + return exitValue; + } + + public StreamOutput getStdout() { + return stdout; + } + + public StreamOutput getStderr() { + return stderr; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java new file mode 100755 index 000000000..b9f67f3a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import com.sun.corba.se.spi.orbutil.fsm.Input; + +import java.io.File; +import java.util.Map; + +public class ProcessSettings { + private String[] command; + private Map environment; + private File directory; + private boolean redirectErrorStream; + private InputStreamSettings stdinSettings; + private OutputStreamSettings stdoutSettings; + private OutputStreamSettings stderrSettings; + + /** + * @param command Command line to run. + */ + public ProcessSettings(String[] command) { + this(command, false, null, null, null, null, null); + } + + /** + * @param command Command line to run. + * @param redirectErrorStream true if stderr should be sent to stdout. + * @param environment Environment settings to override System.getEnv, or null to use System.getEnv. + * @param directory The directory to run the command in, or null to run in the current directory. + * @param stdinSettings Settings for writing to the process stdin. + * @param stdoutSettings Settings for capturing the process stdout. + * @param stderrSettings Setting for capturing the process stderr. + */ + public ProcessSettings(String[] command, boolean redirectErrorStream, File directory, Map environment, + InputStreamSettings stdinSettings, OutputStreamSettings stdoutSettings, OutputStreamSettings stderrSettings) { + this.command = checkCommand(command); + this.redirectErrorStream = redirectErrorStream; + this.directory = directory; + this.environment = environment; + this.stdinSettings = checkSettings(stdinSettings); + this.stdoutSettings = checkSettings(stdoutSettings); + this.stderrSettings = checkSettings(stderrSettings); + } + + public String[] getCommand() { + return command; + } + + public void setCommand(String[] command) { + this.command = checkCommand(command); + } + + public boolean isRedirectErrorStream() { + return redirectErrorStream; + } + + public void setRedirectErrorStream(boolean redirectErrorStream) { + this.redirectErrorStream = redirectErrorStream; + } + + public File getDirectory() { + return directory; + } + + public void setDirectory(File directory) { + this.directory = directory; + } + + public Map getEnvironment() { + return environment; + } + + public void setEnvironment(Map environment) { + this.environment = environment; + } + + public InputStreamSettings getStdinSettings() { + return stdinSettings; + } + + public void setStdinSettings(InputStreamSettings stdinSettings) { + this.stdinSettings = checkSettings(stdinSettings); + } + + public OutputStreamSettings getStdoutSettings() { + return stdoutSettings; + } + + public void setStdoutSettings(OutputStreamSettings stdoutSettings) { + this.stdoutSettings = checkSettings(stdoutSettings); + } + + public OutputStreamSettings getStderrSettings() { + return stderrSettings; + } + + public void setStderrSettings(OutputStreamSettings stderrSettings) { + this.stderrSettings = checkSettings(stderrSettings); + } + + protected String[] checkCommand(String[] command) { + if (command == null) + throw new IllegalArgumentException("Command is not allowed to be null"); + for (String s: command) + if (s == null) + throw new IllegalArgumentException("Command is not allowed to contain nulls"); + return command; + } + + protected InputStreamSettings checkSettings(InputStreamSettings settings) { + return settings == null ? new InputStreamSettings() : settings; + } + + protected OutputStreamSettings checkSettings(OutputStreamSettings settings) { + return settings == null ? new OutputStreamSettings() : settings; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java new file mode 100755 index 000000000..df72180f1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +/** + * Where to read/write a stream + */ +public enum StreamLocation { + Buffer, File, Standard +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java new file mode 100755 index 000000000..5dc94815f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +/** + * The content of stdout or stderr. + */ +public abstract class StreamOutput { + /** + * Empty stream output when no output is captured due to an error. + */ + public static final StreamOutput EMPTY = new StreamOutput() { + @Override + public byte[] getBufferBytes() { + return new byte[0]; + } + + @Override + public boolean isBufferTruncated() { + return false; + } + }; + + /** + * Returns the content as a string. + * + * @return The content as a string. + */ + public String getBufferString() { + return new String(getBufferBytes()); + } + + /** + * Returns the content as a string. + * + * @return The content as a string. + */ + public abstract byte[] getBufferBytes(); + + /** + * Returns true if the buffer was truncated. + * + * @return true if the buffer was truncated. + */ + public abstract boolean isBufferTruncated(); +} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 2dcdd5ce6..1b3641128 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -2,11 +2,15 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.io.File; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * @author aaron @@ -29,7 +33,7 @@ public class ArtificialSAMUtils { File outFile = new File(filename); SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile); - + for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) { out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH)); @@ -134,6 +138,7 @@ public class ArtificialSAMUtils { /** * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * + * * @param header the SAM header to associate the read with * @param name the name of the read * @param refIndex the reference index, i.e. what chromosome to associate it with @@ -142,11 +147,11 @@ public class ArtificialSAMUtils { * * @return the artificial read */ - public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) { + public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) { if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || - (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) + (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); - SAMRecord record = new SAMRecord(header); + GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(name); record.setReferenceIndex(refIndex); record.setAlignmentStart(alignmentStart); @@ -166,6 +171,7 @@ public class ArtificialSAMUtils { if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { record.setReadUnmappedFlag(true); } + return record; } @@ -181,19 +187,51 @@ public class ArtificialSAMUtils { * * @return the artificial read */ - public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { + public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { if (bases.length != qual.length) { throw new ReviewedStingException("Passed in read string is different length then the quality array"); } - SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); + GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); rec.setReadBases(bases); rec.setBaseQualities(qual); if (refIndex == -1) { rec.setReadUnmappedFlag(true); } + return rec; } + public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); + SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); + + left.setReadPairedFlag(true); + right.setReadPairedFlag(true); + + left.setProperPairFlag(true); + right.setProperPairFlag(true); + + left.setFirstOfPairFlag(leftIsFirst); + right.setFirstOfPairFlag(! leftIsFirst); + + left.setReadNegativeStrandFlag(leftIsNegative); + left.setMateNegativeStrandFlag(!leftIsNegative); + right.setReadNegativeStrandFlag(!leftIsNegative); + right.setMateNegativeStrandFlag(leftIsNegative); + + left.setMateAlignmentStart(right.getAlignmentStart()); + right.setMateAlignmentStart(left.getAlignmentStart()); + + left.setMateReferenceIndex(0); + right.setMateReferenceIndex(0); + + int isize = rightStart + readLen - leftStart; + left.setInferredInsertSize(isize); + right.setInferredInsertSize(-isize); + + return Arrays.asList(left, right); + } + /** * create an iterator containing the specified read piles * @@ -255,4 +293,52 @@ public class ArtificialSAMUtils { return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } + + private final static int ranIntInclusive(Random ran, int start, int stop) { + final int range = stop - start; + return ran.nextInt(range) + start; + } + + /** + * Creates a read backed pileup containing up to pileupSize reads at refID 0 from header at loc with + * reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert + * size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second + * may be, depending on where this sampled insertSize puts it. + * @param header + * @param loc + * @param readLen + * @param insertSize + * @param pileupSize + * @return + */ + public static ReadBackedPileup createReadBackedPileup(final SAMFileHeader header, final GenomeLoc loc, final int readLen, final int insertSize, final int pileupSize) { + final Random ran = new Random(); + final boolean leftIsFirst = true; + final boolean leftIsNegative = false; + final int insertSizeVariation = insertSize / 10; + final int pos = loc.getStart(); + + final List pileupElements = new ArrayList(); + for ( int i = 0; i < pileupSize / 2; i++ ) { + final String readName = "read" + i; + final int leftStart = ranIntInclusive(ran, 1, pos); + final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize); + final int rightStart = leftStart + fragmentSize - readLen; + + if ( rightStart <= 0 ) continue; + + List pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + final SAMRecord left = pair.get(0); + final SAMRecord right = pair.get(1); + + pileupElements.add(new PileupElement(left, pos - leftStart)); + + if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) { + pileupElements.add(new PileupElement(right, pos - rightStart)); + } + } + + Collections.sort(pileupElements); + return new ReadBackedPileupImpl(loc, pileupElements); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index c7ffcab0c..ff7d12f09 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.NGSPlatform; /** * @author ebanks @@ -15,16 +16,28 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; + private NGSPlatform mNGSPlatform = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedSample = false; private boolean retrievedPlatform = false; + private boolean retrievedNGSPlatform = false; + public GATKSAMReadGroupRecord(final String id) { + super(id); + } public GATKSAMReadGroupRecord(SAMReadGroupRecord record) { super(record.getReadGroupId(), record); } + public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) { + super(record.getReadGroupId(), record); + setPlatform(pl.getDefaultPlatform()); + mNGSPlatform = pl; + retrievedPlatform = retrievedNGSPlatform = true; + } + /////////////////////////////////////////////////////////////////////////////// // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// @@ -55,5 +68,15 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { super.setPlatform(s); mPlatform = s; retrievedPlatform = true; + retrievedNGSPlatform = false; // recalculate the NGSPlatform + } + + public NGSPlatform getNGSPlatform() { + if ( ! retrievedNGSPlatform ) { + mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform()); + retrievedNGSPlatform = true; + } + + return mNGSPlatform; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index e7c235cf7..d6c0b68b8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -1,49 +1,56 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.NGSPlatform; -import java.lang.reflect.Method; import java.util.HashMap; -import java.util.List; import java.util.Map; /** - * @author ebanks + * @author ebanks, depristo * GATKSAMRecord * - * this class extends the samtools SAMRecord class and caches important + * this class extends the samtools BAMRecord class (and SAMRecord) and caches important * (and oft-accessed) data that's not already cached by the SAMRecord class * * IMPORTANT NOTE: Because ReadGroups are not set through the SAMRecord, * if they are ever modified externally then one must also invoke the * setReadGroup() method here to ensure that the cache is kept up-to-date. * - * 13 Oct 2010 - mhanna - this class is fundamentally flawed: it uses a decorator - * pattern to wrap a heavyweight object, which can lead - * to heinous side effects if the wrapping is not carefully - * done. Hopefully SAMRecord will become an interface and - * this will eventually be fixed. */ -public class GATKSAMRecord extends SAMRecord { - - // the underlying SAMRecord which we are wrapping - private final SAMRecord mRecord; - +public class GATKSAMRecord extends BAMRecord { // the SAMRecord data we're caching private String mReadString = null; - private SAMReadGroupRecord mReadGroup = null; - private boolean mNegativeStrandFlag; - private boolean mUnmappedFlag; - private Boolean mSecondOfPairFlag = null; + private GATKSAMReadGroupRecord mReadGroup = null; + private byte[] reducedReadCounts = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; - - /** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */ - private boolean lookedUpReducedReadQuality = false; - private Integer reducedReadQuality; + private boolean retrievedReduceReadCounts = false; // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to @@ -51,101 +58,112 @@ public class GATKSAMRecord extends SAMRecord { // These attributes exist in memory only, and are never written to disk. private Map temporaryAttributes; - public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities, byte defaultBaseQualities) { - super(null); // it doesn't matter - this isn't used - if ( record == null ) - throw new IllegalArgumentException("The SAMRecord argument cannot be null"); - mRecord = record; + /** + * HACK TO CREATE GATKSAMRECORD WITH ONLY A HEADER FOR TESTING PURPOSES ONLY + * @param header + */ + public GATKSAMRecord(final SAMFileHeader header) { + this(new SAMRecord(header)); + } - mNegativeStrandFlag = mRecord.getReadNegativeStrandFlag(); - mUnmappedFlag = mRecord.getReadUnmappedFlag(); + /** + * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY + * @param read + */ + public GATKSAMRecord(final SAMRecord read) { + super(read.getHeader(), read.getMateReferenceIndex(), + read.getAlignmentStart(), + read.getReadName() != null ? (short)read.getReadNameLength() : 0, + (short)read.getMappingQuality(), + 0, + read.getCigarLength(), + read.getFlags(), + read.getReadLength(), + read.getMateReferenceIndex(), + read.getMateAlignmentStart(), + read.getInferredInsertSize(), + new byte[]{}); + super.clearAttributes(); + } - // because attribute methods are declared to be final (and we can't overload them), - // we need to actually set all of the attributes here - List attributes = record.getAttributes(); - for ( SAMTagAndValue attribute : attributes ) - setAttribute(attribute.tag, attribute.value); - - // if we are using default quals, check if we need them, and add if necessary. - // 1. we need if reads are lacking or have incomplete quality scores - // 2. we add if defaultBaseQualities has a positive value - if (defaultBaseQualities >= 0) { - byte reads [] = record.getReadBases(); - byte quals [] = record.getBaseQualities(); - if (quals == null || quals.length < reads.length) { - byte new_quals [] = new byte [reads.length]; - for (int i=0; i getAttributes() { return mRecord.getAttributes(); } - - public SAMFileHeader getHeader() { return mRecord.getHeader(); } - - public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); } - - public byte[] getVariableBinaryRepresentation() { return mRecord.getVariableBinaryRepresentation(); } - - public int getAttributesBinarySize() { return mRecord.getAttributesBinarySize(); } - - public String format() { return mRecord.format(); } - - public List getAlignmentBlocks() { return mRecord.getAlignmentBlocks(); } - - public List validateCigar(long l) { return mRecord.validateCigar(l); } - @Override public boolean equals(Object o) { if (this == o) return true; - // note -- this forbids a GATKSAMRecord being equal to its underlying SAMRecord if (!(o instanceof GATKSAMRecord)) return false; // note that we do not consider the GATKSAMRecord internal state at all - return mRecord.equals(((GATKSAMRecord)o).mRecord); - } - - public int hashCode() { return mRecord.hashCode(); } - - public List isValid() { return mRecord.isValid(); } - - public Object clone() throws CloneNotSupportedException { return mRecord.clone(); } - - public String toString() { return mRecord.toString(); } - - public SAMFileSource getFileSource() { return mRecord.getFileSource(); } - - /** - * Sets a marker providing the source reader for this file and the position in the file from which the read originated. - * @param fileSource source of the given file. - */ - @Override - protected void setFileSource(final SAMFileSource fileSource) { - try { - Method method = SAMRecord.class.getDeclaredMethod("setFileSource",SAMFileSource.class); - method.setAccessible(true); - method.invoke(mRecord,fileSource); - } - catch(Exception ex) { - throw new ReviewedStingException("Unable to invoke setFileSource method",ex); - } + return super.equals(o); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java new file mode 100644 index 000000000..d96c874ea --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordFactory; +import net.sf.samtools.BAMRecord; +import org.broadinstitute.sting.utils.exceptions.UserException; + +/** + * Factory interface implementation used to create GATKSamRecords + * from SAMFileReaders with SAM-JDK + * + * @author Mark DePristo + */ +public class GATKSamRecordFactory implements SAMRecordFactory { + + /** Create a new SAMRecord to be filled in */ + public SAMRecord createSAMRecord(SAMFileHeader header) { + throw new UserException.BadInput("The GATK now longer supports input SAM files"); + } + + /** Create a new BAM Record. */ + public BAMRecord createBAMRecord(final SAMFileHeader header, + final int referenceSequenceIndex, + final int alignmentStart, + final short readNameLength, + final short mappingQuality, + final int indexingBin, + final int cigarLen, + final int flags, + final int readLen, + final int mateReferenceSequenceIndex, + final int mateAlignmentStart, + final int insertSize, + final byte[] variableLengthBlock) { + return new GATKSAMRecord(header, + referenceSequenceIndex, + alignmentStart, + readNameLength, + mappingQuality, + indexingBin, + cigarLen, + flags, + readLen, + mateReferenceSequenceIndex, + mateAlignmentStart, + insertSize, + variableLengthBlock); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index d1e9a236f..f8e4927ed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -52,38 +52,6 @@ public class ReadUtils { // ---------------------------------------------------------------------------------------------------- public static final String REDUCED_READ_QUALITY_TAG = "RQ"; - public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC"; - - public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) { - return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); - } - - public final static boolean isReducedRead(final SAMRecord read) { - return getReducedReadQualityTagValue(read) != null; - } - - public final static byte getReducedQual(final SAMRecord read, final int i) { - return read.getBaseQualities()[i]; - } - - public final static byte getReducedCount(final SAMRecord read, final int i) { - return getReducedReadQualityTagValue(read)[i]; - } - - public final static SAMRecord reducedReadWithReducedQuals(final SAMRecord read) { - if ( ! isReducedRead(read) ) throw new IllegalArgumentException("read must be a reduced read"); - return read; -// try { -// SAMRecord newRead = (SAMRecord)read.clone(); -// byte reducedQual = (byte)(int)getReducedReadQualityTagValue(read); -// byte[] newQuals = new byte[read.getBaseQualities().length]; -// Arrays.fill(newQuals, reducedQual); -// newRead.setBaseQualities(newQuals); -// return newRead; -// } catch ( CloneNotSupportedException e ) { -// throw new ReviewedStingException("SAMRecord no longer supports clone", e); -// } - } // ---------------------------------------------------------------------------------------------------- // diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 8e11add33..f99a105ae 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -50,6 +50,7 @@ public abstract class BaseTest { public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta"; public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta"; + //public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"; public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta"; public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/"; public static final String validationDataLocation = GATKDataLocation + "Validation_Data/"; @@ -99,10 +100,10 @@ public abstract class BaseTest { logger.setLevel(Level.WARN); // find our file sources - if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) { - logger.fatal("We can't locate the reference directories. Aborting!"); - throw new RuntimeException("BaseTest setup failed: unable to locate the reference directories"); - } +// if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) { +// logger.fatal("We can't locate the reference directories. Aborting!"); +// throw new RuntimeException("BaseTest setup failed: unable to locate the reference directories"); +// } } /** diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java new file mode 100644 index 000000000..99d6b88f3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class ArgumentMatchSiteUnitTest { + @Test + public void testCommandLine() { + ArgumentMatchSite site = new ArgumentMatchSite(ArgumentMatchSource.COMMAND_LINE, 1); + Assert.assertEquals(site.getSource(), ArgumentMatchSource.COMMAND_LINE); + Assert.assertEquals(site.getIndex(), 1); + } + + @Test + public void testFile() { + ArgumentMatchSource source = new ArgumentMatchSource(new File("test")); + ArgumentMatchSite site = new ArgumentMatchSite(source, 1); + Assert.assertEquals(site.getSource(), source); + Assert.assertEquals(site.getIndex(), 1); + } + + @Test + public void testEquals() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSite site1 = new ArgumentMatchSite(cmdLine, 1); + ArgumentMatchSite site2 = new ArgumentMatchSite(cmdLine, 2); + + Assert.assertFalse(site1.equals(null)); + + Assert.assertTrue(site1.equals(site1)); + Assert.assertFalse(site1.equals(site2)); + + Assert.assertFalse(site2.equals(site1)); + Assert.assertTrue(site2.equals(site2)); + } + + @Test + public void testCompareTo() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSite site1 = new ArgumentMatchSite(cmdLine, 1); + ArgumentMatchSite site2 = new ArgumentMatchSite(cmdLine, 2); + + Assert.assertTrue(site1.compareTo(site1) == 0); + Assert.assertTrue(site1.compareTo(site2) < 0); + Assert.assertTrue(site2.compareTo(site1) > 0); + Assert.assertTrue(site2.compareTo(site2) == 0); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testCompareToNull() { + new ArgumentMatchSite(ArgumentMatchSource.COMMAND_LINE, 0).compareTo(null); + } +} diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java new file mode 100644 index 000000000..4bc7eb822 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class ArgumentMatchSourceUnitTest extends BaseTest { + @Test + public void testCommandLine() { + ArgumentMatchSource source = ArgumentMatchSource.COMMAND_LINE; + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.CommandLine); + Assert.assertNull(source.getFile()); + } + + @Test + public void testFile() { + File f = new File("test"); + ArgumentMatchSource source = new ArgumentMatchSource(f); + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.File); + Assert.assertEquals(source.getFile(), f); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullFile() { + new ArgumentMatchSource(null); + } + + @Test + public void testEquals() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + + Assert.assertFalse(cmdLine.equals(null)); + + Assert.assertTrue(cmdLine.equals(cmdLine)); + Assert.assertFalse(cmdLine.equals(fileA)); + Assert.assertFalse(cmdLine.equals(fileB)); + + Assert.assertFalse(fileA.equals(cmdLine)); + Assert.assertTrue(fileA.equals(fileA)); + Assert.assertFalse(fileA.equals(fileB)); + + Assert.assertFalse(fileB.equals(cmdLine)); + Assert.assertFalse(fileB.equals(fileA)); + Assert.assertTrue(fileB.equals(fileB)); + } + + @Test + public void testCompareTo() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + + Assert.assertTrue(cmdLine.compareTo(cmdLine) == 0); + Assert.assertTrue(cmdLine.compareTo(fileA) < 0); + Assert.assertTrue(cmdLine.compareTo(fileB) < 0); + + Assert.assertTrue(fileA.compareTo(cmdLine) > 0); + Assert.assertTrue(fileA.compareTo(fileA) == 0); + Assert.assertTrue(fileA.compareTo(fileB) < 0); + + Assert.assertTrue(fileB.compareTo(cmdLine) > 0); + Assert.assertTrue(fileB.compareTo(fileA) > 0); + Assert.assertTrue(fileB.compareTo(fileB) == 0); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testCompareToNull() { + ArgumentMatchSource.COMMAND_LINE.compareTo(null); + } +} diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index f04731214..87f0e6ff0 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import org.apache.commons.io.FileUtils; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -34,6 +35,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; import java.util.List; import java.util.EnumSet; /** @@ -493,6 +496,7 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertNotNull(definition, "Invalid default argument name assigned"); } + @SuppressWarnings("unused") private class CamelCaseArgProvider { @Argument(doc="my arg") Integer myArg; @@ -507,6 +511,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.validate(); } + @SuppressWarnings("unused") private class BooleanArgProvider { @Argument(doc="my bool") boolean myBool; @@ -561,6 +566,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.validate(); } + @SuppressWarnings("unused") private class MutuallyExclusiveArgProvider { @Argument(doc="foo",exclusiveOf="bar") Integer foo; @@ -618,6 +624,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.addArgumentSource( MultipleArgumentCollectionProvider.class ); } + @SuppressWarnings("unused") private class MultipleArgumentCollectionProvider { @ArgumentCollection RequiredArgProvider rap1 = new RequiredArgProvider(); @@ -937,4 +944,23 @@ public class ParsingEngineUnitTest extends BaseTest { VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); parsingEngine.loadArgumentsIntoObject( argProvider ); } + + @Test + public void argumentListTest() throws IOException { + File argsFile = BaseTest.createTempFile("args.", ".list"); + try { + FileUtils.write(argsFile, "-I na12878.bam"); + final String[] commandLine = new String[] {"-args", argsFile.getPath()}; + parsingEngine.addArgumentSource(InputFileArgProvider.class); + parsingEngine.parse(commandLine); + parsingEngine.validate(); + + InputFileArgProvider argProvider = new InputFileArgProvider(); + parsingEngine.loadArgumentsIntoObject(argProvider); + + Assert.assertEquals(argProvider.inputFile, "na12878.bam", "Argument is not correctly initialized"); + } finally { + FileUtils.deleteQuietly(argsFile); + } + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java index 702ba9f4f..c7eb4d88b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java @@ -18,7 +18,7 @@ public class BAQIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testPrintReadsNoBAQ() { - WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("902197bf77ed5a828d50e08771685928")); + WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("d97340a2bba2c6320d1ebeb86024a27c")); executeTest(String.format("testPrintReadsNoBAQ"), spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6b6346447..7d6cfc7ad 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -224,7 +224,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("0bece77ce6bc447438ef9b2921b2dc41")); + Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -252,7 +252,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("790b1a1d6ab79eee8c24812bb8ca6fae")); + Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -262,7 +262,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("408d3aba4d094c067fc00a43992c2292")); + Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); } @@ -272,7 +272,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("5e4e09354410b76fc0d822050d84132a")); + Arrays.asList("a20799237accd52c1b8c2ac096309c8f")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -282,7 +282,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("c599eedbeb422713b8a28529e805e4ae")); + Arrays.asList("18ef8181157b4ac3eb8492f538467f92")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("37d908a682ac269f8f19dec939ff5b01")); + Arrays.asList("ad884e511a751b05e64db5314314365a")); executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java new file mode 100644 index 000000000..19fd5b316 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class RScriptLibraryUnitTest { + @Test + public void testProperties() { + Assert.assertEquals(RScriptLibrary.GSALIB.getLibraryName(), "gsalib"); + Assert.assertEquals(RScriptLibrary.GSALIB.getResourcePath(), "gsalib.tar.gz"); + } + + @Test + public void testWriteTemp() { + File file = RScriptLibrary.GSALIB.writeTemp(); + Assert.assertTrue(file.exists(), "R library was not written to temp file: " + file); + FileUtils.deleteQuietly(file); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index cc0007439..59a6ecb8d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -5,6 +5,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -12,7 +13,7 @@ import org.testng.annotations.Test; public class ReadUtilsUnitTest extends BaseTest { - SAMRecord read, reducedRead; + GATKSAMRecord read, reducedRead; final static String BASES = "ACTG"; final static String QUALS = "!+5?"; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; @@ -47,13 +48,12 @@ public class ReadUtilsUnitTest extends BaseTest { @Test public void testReducedReads() { - Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read"); - Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read"); + Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); + Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); - Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read"); + Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); for ( int i = 0; i < reducedRead.getReadLength(); i++) { - Assert.assertEquals(ReadUtils.getReducedQual(reducedRead, i), read.getBaseQualities()[i], "Reduced read quality not set to the expected value at " + i); - Assert.assertEquals(ReadUtils.getReducedCount(reducedRead, i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); + Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java index 76dd5d341..0f19e2f90 100644 --- a/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.Test; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -28,7 +29,7 @@ public class ReservoirDownsamplerUnitTest { @Test public void testOneElementWithPoolSizeOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); + List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); ReservoirDownsampler downsampler = new ReservoirDownsampler(1); downsampler.addAll(reads); @@ -40,7 +41,7 @@ public class ReservoirDownsamplerUnitTest { @Test public void testOneElementWithPoolSizeGreaterThanOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); + List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); ReservoirDownsampler downsampler = new ReservoirDownsampler(5); downsampler.addAll(reads); diff --git a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java new file mode 100644 index 000000000..4caf7f485 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java @@ -0,0 +1,197 @@ +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.BaseTest; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class IOUtilsUnitTest extends BaseTest { + @Test + public void testGoodTempDir() { + IOUtils.checkTempDir(new File("/tmp/queue")); + } + + @Test(expectedExceptions=UserException.BadTmpDir.class) + public void testBadTempDir() { + IOUtils.checkTempDir(new File("/tmp")); + } + + @Test + public void testAbsoluteSubDir() { + File subDir = IOUtils.absolute(new File("."), new File("/path/to/file")); + Assert.assertEquals(subDir, new File("/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/path"), new File("/path/to/file")); + Assert.assertEquals(subDir, new File("/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/path"), new File(".")); + Assert.assertEquals(subDir, new File("/different/path")); + } + + @Test + public void testRelativeSubDir() throws IOException { + File subDir = IOUtils.absolute(new File("."), new File("path/to/file")); + Assert.assertEquals(subDir.getCanonicalFile(), new File("path/to/file").getCanonicalFile()); + + subDir = IOUtils.absolute(new File("/different/path"), new File("path/to/file")); + Assert.assertEquals(subDir, new File("/different/path/path/to/file")); + } + + @Test + public void testDottedSubDir() throws IOException { + File subDir = IOUtils.absolute(new File("."), new File("path/../to/file")); + Assert.assertEquals(subDir.getCanonicalFile(), new File("path/../to/./file").getCanonicalFile()); + + subDir = IOUtils.absolute(new File("."), new File("/path/../to/file")); + Assert.assertEquals(subDir, new File("/path/../to/file")); + + subDir = IOUtils.absolute(new File("/different/../path"), new File("path/to/file")); + Assert.assertEquals(subDir, new File("/different/../path/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/./path"), new File("/path/../to/file")); + Assert.assertEquals(subDir, new File("/path/../to/file")); + } + + @Test + public void testTempDir() { + File tempDir = IOUtils.tempDir("Q-Unit-Test", "", new File("queueTempDirToDelete")); + Assert.assertTrue(tempDir.exists()); + Assert.assertFalse(tempDir.isFile()); + Assert.assertTrue(tempDir.isDirectory()); + boolean deleted = IOUtils.tryDelete(tempDir); + Assert.assertTrue(deleted); + Assert.assertFalse(tempDir.exists()); + } + + @Test + public void testDirLevel() { + File dir = IOUtils.dirLevel(new File("/path/to/directory"), 1); + Assert.assertEquals(dir, new File("/path")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 2); + Assert.assertEquals(dir, new File("/path/to")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 3); + Assert.assertEquals(dir, new File("/path/to/directory")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 4); + Assert.assertEquals(dir, new File("/path/to/directory")); + } + + @Test + public void testAbsolute() { + File dir = IOUtils.absolute(new File("/path/./to/./directory/.")); + Assert.assertEquals(dir, new File("/path/to/directory")); + + dir = IOUtils.absolute(new File("/")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/.")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/././.")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/./directory/.")); + Assert.assertEquals(dir, new File("/directory")); + + dir = IOUtils.absolute(new File("/./directory/./")); + Assert.assertEquals(dir, new File("/directory")); + + dir = IOUtils.absolute(new File("/./directory./")); + Assert.assertEquals(dir, new File("/directory.")); + + dir = IOUtils.absolute(new File("/./.directory/")); + Assert.assertEquals(dir, new File("/.directory")); + } + + @Test + public void testTail() throws IOException { + List lines = Arrays.asList( + "chr18_random 4262 3154410390 50 51", + "chr19_random 301858 3154414752 50 51", + "chr21_random 1679693 3154722662 50 51", + "chr22_random 257318 3156435963 50 51", + "chrX_random 1719168 3156698441 50 51"); + List tail = IOUtils.tail(new File(BaseTest.hg18Reference + ".fai"), 5); + Assert.assertEquals(tail.size(), 5); + for (int i = 0; i < 5; i++) + Assert.assertEquals(tail.get(i), lines.get(i)); + } + + @Test + public void testWriteSystemFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("StingText.properties", null), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteSystemTempFile() throws IOException { + File temp = IOUtils.writeTempResource(new Resource("StingText.properties", null)); + try { + Assert.assertTrue(temp.getName().startsWith("StingText"), "File does not start with 'StingText.': " + temp); + Assert.assertTrue(temp.getName().endsWith(".properties"), "File does not end with '.properties': " + temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMissingSystemFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("MissingStingText.properties", null), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteRelativeFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("/StingText.properties", IOUtils.class), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteRelativeTempFile() throws IOException { + File temp = IOUtils.writeTempResource(new Resource("/StingText.properties", IOUtils.class)); + try { + Assert.assertTrue(temp.getName().startsWith("StingText"), "File does not start with 'StingText.': " + temp); + Assert.assertTrue(temp.getName().endsWith(".properties"), "File does not end with '.properties': " + temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMissingRelativeFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + // Looking for /org/broadinstitute/sting/utils/file/StingText.properties + IOUtils.writeResource(new Resource("StingText.properties", IOUtils.class), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testResourceProperties() { + Resource resource = new Resource("foo", Resource.class); + Assert.assertEquals(resource.getPath(), "foo"); + Assert.assertEquals(resource.getRelativeClass(), Resource.class); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java new file mode 100644 index 000000000..8b797def4 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; +import com.google.caliper.runner.CaliperMain; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; + +import java.util.*; + +/** + * Caliper microbenchmark of fragment pileup + */ +public class FragmentPileupBenchmark extends SimpleBenchmark { + List pileups; + + @Param({"0", "4", "30", "150", "1000"}) + int pileupSize; // set automatically by framework + + @Param({"200", "400"}) + int insertSize; // set automatically by framework + + @Override protected void setUp() { + final int nPileupsToGenerate = 100; + pileups = new ArrayList(nPileupsToGenerate); + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GenomeLocParser genomeLocParser; + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 50); + final int readLen = 100; + + for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) { + ReadBackedPileup rbp = ArtificialSAMUtils.createReadBackedPileup(header, loc, readLen, insertSize, pileupSize); + pileups.add(rbp); + } + } + + private void run(int rep, FragmentPileup.FragmentMatchingAlgorithm algorithm) { + int nFrags = 0; + for ( int i = 0; i < rep; i++ ) { + for ( ReadBackedPileup rbp : pileups ) + nFrags += new FragmentPileup(rbp, algorithm).getTwoReadPileup().size(); + } + } + + public void timeOriginal(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL); + } + + public void timeSkipNonOverlapping(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping); + } + + public static void main(String[] args) { + CaliperMain.main(FragmentPileupBenchmark.class, args); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java new file mode 100644 index 000000000..c42c01c65 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Test routines for read-backed pileup. + */ +public class FragmentPileupUnitTest extends BaseTest { + private static SAMFileHeader header; + + private class FragmentPileupTest extends TestDataProvider { + List states = new ArrayList(); + + private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); + + List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + SAMRecord left = pair.get(0); + SAMRecord right = pair.get(1); + + for ( int pos = leftStart; pos < rightStart + readLen; pos++) { + boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); + boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); + + if ( posCoveredByLeft || posCoveredByRight ) { + List reads = new ArrayList(); + List offsets = new ArrayList(); + + if ( posCoveredByLeft ) { + reads.add(left); + offsets.add(pos - left.getAlignmentStart()); + } + + if ( posCoveredByRight ) { + reads.add(right); + offsets.add(pos - right.getAlignmentStart()); + } + + boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight; + ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); + TestState testState = new TestState(shouldBeFragment, pileup); + states.add(testState); + } + } + } + } + + private class TestState { + boolean shouldBeFragment; + ReadBackedPileup pileup; + + private TestState(final boolean shouldBeFragment, final ReadBackedPileup pileup) { + this.shouldBeFragment = shouldBeFragment; + this.pileup = pileup; + } + } + + @DataProvider(name = "fragmentPileupTest") + public Object[][] createTests() { + for ( boolean leftIsFirst : Arrays.asList(true, false) ) { + for ( boolean leftIsNegative : Arrays.asList(true, false) ) { + // Overlapping pair + // ----> [first] + // <--- [second] + new FragmentPileupTest("overlapping-pair", 10, 1, 5, leftIsFirst, leftIsNegative); + + // Non-overlapping pair + // ----> + // <---- + new FragmentPileupTest("nonoverlapping-pair", 10, 1, 15, leftIsFirst, leftIsNegative); + } + } + + return FragmentPileupTest.getTests(FragmentPileupTest.class); + } + + @Test(enabled = true, dataProvider = "fragmentPileupTest") + public void testMe(FragmentPileupTest test) { + for ( TestState testState : test.states ) { + ReadBackedPileup rbp = testState.pileup; + FragmentPileup fp = new FragmentPileup(rbp); + Assert.assertEquals(fp.getTwoReadPileup().size(), testState.shouldBeFragment ? 1 : 0); + Assert.assertEquals(fp.getOneReadPileup().size(), testState.shouldBeFragment ? 0 : 1); + } + } + + @BeforeTest + public void setup() { + header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java new file mode 100644 index 000000000..7a31ceee0 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class ProcessControllerUnitTest extends BaseTest { + private static final String NL = String.format("%n"); + + @Test(timeOut = 60 * 1000) + public void testDestroyThreadLocal() throws InterruptedException { + for (int i = 0; i < 3; i++) { + final ProcessController controller = ProcessController.getThreadLocal(); + final ProcessSettings job = new ProcessSettings( + new String[] {"sh", "-c", "echo Hello World && sleep 600 && echo Goodbye"}); + job.getStdoutSettings().setBufferSize(-1); + + Thread t = new Thread(new Runnable() { + @Override + public void run() { + System.out.println("BACK: Starting on background thread"); + ProcessOutput result = controller.exec(job); + // Assert in background thread doesn't make it to main thread but does print a trace. + Assert.assertTrue(result.getExitValue() != 0, "Destroy-attempted job returned zero exit status"); + System.out.println("BACK: Background thread exiting"); + } + }); + + System.out.println("MAIN: Starting background thread"); + t.start(); + System.out.println("MAIN: Sleeping main thread 3s"); + Thread.sleep(3000); + System.out.println("MAIN: Destroying job"); + controller.tryDestroy(); + System.out.println("MAIN: Not waiting on background thread to exit"); + // Using standard java.io this was blocking on linux. + // TODO: try again with NIO. + //t.join(); + //System.out.println("MAIN: Background thread exited"); + } + } + + @Test + public void testReuseAfterError() { + ProcessController controller = new ProcessController(); + + ProcessSettings job; + + for (int i = 0; i < 3; i++) { + // Test bad command + job = new ProcessSettings(new String[] {"no_such_command"}); + try { + controller.exec(job); + } catch (ReviewedStingException e) { + /* Was supposed to throw an exception */ + } + + // Test exit != 0 + job = new ProcessSettings(new String[] {"cat", "non_existent_file"}); + int exitValue = controller.exec(job).getExitValue(); + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + // Text success + job = new ProcessSettings(new String[] {"echo", "Hello World"}); + exitValue = controller.exec(job).getExitValue(); + Assert.assertEquals(exitValue, 0, "Echo failed"); + } + } + + @Test + public void testEnvironment() { + String key = "MY_NEW_VAR"; + String value = "value is here"; + + ProcessSettings job = new ProcessSettings(new String[] {"sh", "-c", "echo $"+key}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + + Map env = new HashMap(System.getenv()); + env.put(key, value); + job.setEnvironment(env); + + ProcessController controller = new ProcessController(); + ProcessOutput result = controller.exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, "Echo environment variable failed"); + Assert.assertEquals(result.getStdout().getBufferString(), value + NL, "Echo environment returned unexpected output"); + } + + @Test + public void testDirectory() throws IOException { + File dir = null; + try { + dir = IOUtils.tempDir("temp.", "").getCanonicalFile(); + + ProcessSettings job = new ProcessSettings(new String[] {"pwd"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.setDirectory(dir); + + ProcessController controller = new ProcessController(); + ProcessOutput result = controller.exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, "Getting working directory failed"); + + Assert.assertEquals(result.getStdout().getBufferString(), dir.getAbsolutePath() + NL, + "Setting/getting working directory returned unexpected output"); + } finally { + FileUtils.deleteQuietly(dir); + } + } + + @Test + public void testReadStdInBuffer() { + String bufferText = "Hello from buffer"; + ProcessSettings job = new ProcessSettings(new String[] {"cat"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.getStdinSettings().setInputBuffer(bufferText); + + ProcessController controller = new ProcessController(); + ProcessOutput output = controller.exec(job); + + Assert.assertEquals(output.getStdout().getBufferString(), bufferText, + "Unexpected output from cat stdin buffer"); + } + + @Test + public void testReadStdInFile() { + File input = null; + try { + String fileText = "Hello from file"; + input = IOUtils.writeTempFile(fileText, "stdin.", ".txt", null); + + ProcessSettings job = new ProcessSettings(new String[] {"cat"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.getStdinSettings().setInputFile(input); + + ProcessController controller = new ProcessController(); + ProcessOutput output = controller.exec(job); + + Assert.assertEquals(output.getStdout().getBufferString(), fileText, + "Unexpected output from cat stdin file"); + } finally { + FileUtils.deleteQuietly(input); + } + } + + @Test + public void testWriteStdOut() { + ProcessSettings job = new ProcessSettings(new String[] {"echo", "Testing to stdout"}); + // Not going to call the System.setOut() for now. Just running a basic visual test. + job.getStdoutSettings().printStandard(true); + job.setRedirectErrorStream(true); + + System.out.println("testWriteStdOut: Writing two lines to std out..."); + ProcessController controller = new ProcessController(); + controller.exec(job); + job.setCommand(new String[]{"cat", "non_existent_file"}); + controller.exec(job); + System.out.println("testWriteStdOut: ...two lines should have been printed to std out"); + } + + @Test + public void testErrorToOut() throws IOException { + File outFile = null; + File errFile = null; + try { + outFile = BaseTest.createTempFile("temp", ""); + errFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"cat", "non_existent_file"}); + job.getStdoutSettings().setOutputFile(outFile); + job.getStdoutSettings().setBufferSize(-1); + job.getStderrSettings().setOutputFile(errFile); + job.getStderrSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + String fileString, bufferString; + + fileString = FileUtils.readFileToString(outFile); + Assert.assertTrue(fileString.length() > 0, "Out file was length 0"); + + bufferString = result.getStdout().getBufferString(); + Assert.assertTrue(bufferString.length() > 0, "Out buffer was length 0"); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), "Out buffer was truncated"); + Assert.assertEquals(bufferString.length(), fileString.length(), "Out buffer length did not match file length"); + + fileString = FileUtils.readFileToString(errFile); + Assert.assertEquals(fileString, "", "Unexpected output to err file"); + + bufferString = result.getStderr().getBufferString(); + Assert.assertEquals(bufferString, "", "Unexepected output to err buffer"); + } finally { + FileUtils.deleteQuietly(outFile); + FileUtils.deleteQuietly(errFile); + } + } + + @Test + public void testErrorToErr() throws IOException { + File outFile = null; + File errFile = null; + try { + outFile = BaseTest.createTempFile("temp", ""); + errFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"cat", "non_existent_file"}); + job.getStdoutSettings().setOutputFile(outFile); + job.getStdoutSettings().setBufferSize(-1); + job.getStderrSettings().setOutputFile(errFile); + job.getStderrSettings().setBufferSize(-1); + job.setRedirectErrorStream(false); + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + String fileString, bufferString; + + fileString = FileUtils.readFileToString(errFile); + Assert.assertTrue(fileString.length() > 0, "Err file was length 0"); + + bufferString = result.getStderr().getBufferString(); + Assert.assertTrue(bufferString.length() > 0, "Err buffer was length 0"); + + Assert.assertFalse(result.getStderr().isBufferTruncated(), "Err buffer was truncated"); + Assert.assertEquals(bufferString.length(), fileString.length(), "Err buffer length did not match file length"); + + fileString = FileUtils.readFileToString(outFile); + Assert.assertEquals(fileString, "", "Unexpected output to out file"); + + bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, "", "Unexepected output to out buffer"); + } finally { + FileUtils.deleteQuietly(outFile); + FileUtils.deleteQuietly(errFile); + } + } + + private static final String TRUNCATE_TEXT = "Hello World"; + private static final byte[] TRUNCATE_OUTPUT_BYTES = (TRUNCATE_TEXT + NL).getBytes(); + + /** + * @return Test truncating content vs. not truncating (run at -1/+1 size) + */ + @DataProvider(name = "truncateSizes") + public Object[][] getTruncateBufferSizes() { + int l = TRUNCATE_OUTPUT_BYTES.length; + return new Object[][]{ + new Object[]{0, 0}, + new Object[]{l, l}, + new Object[]{l + 1, l}, + new Object[]{l - 1, l - 1} + }; + } + + @Test(dataProvider = "truncateSizes") + public void testTruncateBuffer(int truncateLen, int expectedLen) { + byte[] expected = Arrays.copyOf(TRUNCATE_OUTPUT_BYTES, expectedLen); + + String[] command = {"echo", TRUNCATE_TEXT}; + ProcessController controller = new ProcessController(); + + ProcessSettings job = new ProcessSettings(command); + job.getStdoutSettings().setBufferSize(truncateLen); + ProcessOutput result = controller.exec(job); + + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, + String.format("Echo returned %d: %s", exitValue, TRUNCATE_TEXT)); + + byte[] bufferBytes = result.getStdout().getBufferBytes(); + + Assert.assertEquals(bufferBytes, expected, + String.format("Output buffer didn't match (%d vs %d)", expected.length, bufferBytes.length)); + + boolean truncated = result.getStdout().isBufferTruncated(); + + Assert.assertEquals(truncated, TRUNCATE_OUTPUT_BYTES.length > truncateLen, + "Unexpected buffer truncation result"); + } + + private static final String[] LONG_COMMAND = getLongCommand(); + private static final String LONG_COMMAND_STRING = StringUtils.join(LONG_COMMAND, " "); + private static final String LONG_COMMAND_DESCRIPTION = ""; + + @DataProvider(name = "echoCommands") + public Object[][] getEchoCommands() { + + new EchoCommand(new String[]{"echo", "Hello", "World"}, "Hello World" + NL); + new EchoCommand(new String[]{"echo", "'Hello", "World"}, "'Hello World" + NL); + new EchoCommand(new String[]{"echo", "Hello", "World'"}, "Hello World'" + NL); + new EchoCommand(new String[]{"echo", "'Hello", "World'"}, "'Hello World'" + NL); + + String[] longCommand = new String[LONG_COMMAND.length + 1]; + longCommand[0] = "echo"; + System.arraycopy(LONG_COMMAND, 0, longCommand, 1, LONG_COMMAND.length); + new EchoCommand(longCommand, LONG_COMMAND_STRING + NL) { + @Override + public String toString() { + return LONG_COMMAND_DESCRIPTION; + } + }; + + return TestDataProvider.getTests(EchoCommand.class); + } + + @Test(dataProvider = "echoCommands") + public void testEcho(EchoCommand script) throws IOException { + File outputFile = null; + try { + outputFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(script.command); + if (script.output != null) { + job.getStdoutSettings().setOutputFile(outputFile); + job.getStdoutSettings().setBufferSize(script.output.getBytes().length); + } + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, + String.format("Echo returned %d: %s", exitValue, script)); + + if (script.output != null) { + + String fileString = FileUtils.readFileToString(outputFile); + Assert.assertEquals(fileString, script.output, + String.format("Output file didn't match (%d vs %d): %s", + fileString.length(), script.output.length(), script)); + + String bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, script.output, + String.format("Output content didn't match (%d vs %d): %s", + bufferString.length(), script.output.length(), script)); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), + "Output content was truncated: " + script); + } + } finally { + FileUtils.deleteQuietly(outputFile); + } + } + + @Test(expectedExceptions = ReviewedStingException.class) + public void testUnableToStart() { + ProcessSettings job = new ProcessSettings(new String[]{"no_such_command"}); + new ProcessController().exec(job); + } + + @DataProvider(name = "scriptCommands") + public Object[][] getScriptCommands() { + new ScriptCommand(true, "echo Hello World", "Hello World" + NL); + new ScriptCommand(false, "echo 'Hello World", null); + new ScriptCommand(false, "echo Hello World'", null); + new ScriptCommand(true, "echo 'Hello World'", "Hello World" + NL); + new ScriptCommand(true, "echo \"Hello World\"", "Hello World" + NL); + new ScriptCommand(false, "no_such_echo Hello World", null); + new ScriptCommand(true, "echo #", NL); + new ScriptCommand(true, "echo \\#", "#" + NL); + new ScriptCommand(true, "echo \\\\#", "\\#" + NL); + + new ScriptCommand(true, "echo " + LONG_COMMAND_STRING, LONG_COMMAND_STRING + NL) { + @Override + public String toString() { + return LONG_COMMAND_DESCRIPTION; + } + }; + + return TestDataProvider.getTests(ScriptCommand.class); + } + + @Test(dataProvider = "scriptCommands") + public void testScript(ScriptCommand script) throws IOException { + File scriptFile = null; + File outputFile = null; + try { + scriptFile = writeScript(script.content); + outputFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"sh", scriptFile.getAbsolutePath()}); + if (script.output != null) { + job.getStdoutSettings().setOutputFile(outputFile); + job.getStdoutSettings().setBufferSize(script.output.getBytes().length); + } + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue == 0, script.succeed, + String.format("Script returned %d: %s", exitValue, script)); + + if (script.output != null) { + + String fileString = FileUtils.readFileToString(outputFile); + Assert.assertEquals(fileString, script.output, + String.format("Output file didn't match (%d vs %d): %s", + fileString.length(), script.output.length(), script)); + + String bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, script.output, + String.format("Output content didn't match (%d vs %d): %s", + bufferString.length(), script.output.length(), script)); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), + "Output content was truncated: " + script); + } + } finally { + FileUtils.deleteQuietly(scriptFile); + FileUtils.deleteQuietly(outputFile); + } + } + + private static String[] getLongCommand() { + // This command fails on some systems with a 4096 character limit when run via the old sh -c "echo ...", + // but works on the same systems when run via sh