From 91cb6547911b2564cd2843e787a08805c84bd83b Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Tue, 17 Apr 2012 11:45:32 -0400 Subject: [PATCH] AggregateMetrics: - By porting from jython to java now accessible to Queue via automatic extension generation. - Better handling for problematic sample names by using PicardAggregationUtils. GATKReportTable looks up keys using arrays instead of dot-separated strings, which is useful when a sample has a period in the name. CombineVariants has option to suppress the header with the command line, which is now invoked during VCF gathering. Added SelectHeaders walker for filtering headers for dbGAP submission. Generated command line for read filters now correctly prefixes the argument name as --read_filter instead of -read_filter. Latest WholeGenomePipeline. Other minor cleanup to utility methods. --- .../sting/gatk/io/stubs/VCFWriterStub.java | 50 ++-- .../sting/gatk/report/GATKReportTable.java | 59 ++--- .../walkers/variantutils/CombineVariants.java | 10 +- .../walkers/variantutils/SelectHeaders.java | 250 ++++++++++++++++++ .../gatk/GATKExtensionsGenerator.java | 3 +- .../broadinstitute/sting/utils/R/RUtils.java | 90 +++++++ .../sting/utils/SampleUtils.java | 99 +------ .../sting/utils/codecs/vcf/VCFHeader.java | 73 ++++- .../sting/utils/text/ListFileUtils.java | 176 +++++++++++- .../sting/utils/text/XReadLines.java | 136 ++++++---- .../sting/gatk/report/GATKReportUnitTest.java | 47 +++- .../sting/utils/R/RUtilsUnitTest.java | 64 +++++ .../utils/text/ListFileUtilsUnitTest.java | 77 +++++- .../qscripts/examples/ExampleReadFilter.scala | 47 ++++ .../queue/extensions/gatk/GATKIntervals.scala | 1 - .../extensions/gatk/VcfGatherFunction.scala | 3 +- .../sting/queue/pipeline/PipelineTest.scala | 2 +- .../ExampleReadFilterPipelineTest.scala | 90 +++++++ 18 files changed, 1050 insertions(+), 227 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/R/RUtils.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/R/RUtilsUnitTest.java create mode 100644 public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 82cb43634..94051cc7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,7 +12,6 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -99,8 +98,13 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * Create a new stub given the requested file. + * + * @param engine engine. * @param genotypeFile file to (ultimately) create. * @param isCompressed should we compress the output stream? + * @param argumentSources sources. + * @param skipWritingHeader skip writing header. + * @param doNotWriteGenotypes do not write genotypes. */ public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) { this.engine = engine; @@ -114,8 +118,13 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * Create a new stub given the requested file. + * + * @param engine engine. * @param genotypeStream stream to (ultimately) write. * @param isCompressed should we compress the output stream? + * @param argumentSources sources. + * @param skipWritingHeader skip writing header. + * @param doNotWriteGenotypes do not write genotypes. */ public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) { this.engine = engine; @@ -154,7 +163,7 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * Gets the master sequence dictionary from the engine associated with this stub * @link GenomeAnalysisEngine.getMasterSequenceDictionary - * @return + * @return the master sequence dictionary from the engine associated with this stub */ public SAMSequenceDictionary getMasterSequenceDictionary() { return engine.getMasterSequenceDictionary(); @@ -188,22 +197,25 @@ public class VCFWriterStub implements Stub, VCFWriter { vcfHeader = header; // Check for the command-line argument header line. If not present, add it in. - if ( !skipWritingHeader ) { - VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); - boolean foundCommandLineHeaderLine = false; - for (VCFHeaderLine line: vcfHeader.getMetaData()) { - if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) ) - foundCommandLineHeaderLine = true; + if (!skipWritingHeader && header.isWriteEngineHeaders()) { + + if (header.isWriteCommandLine()) { + VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); + boolean foundCommandLineHeaderLine = false; + for (VCFHeaderLine line: vcfHeader.getMetaData()) { + if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) ) + foundCommandLineHeaderLine = true; + } + if ( !foundCommandLineHeaderLine ) + vcfHeader.addMetaDataLine(commandLineArgHeaderLine); } - if ( !foundCommandLineHeaderLine ) - vcfHeader.addMetaDataLine(commandLineArgHeaderLine); // also put in the reference contig header lines String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName()); for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() ) vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly)); - vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath())); + vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + engine.getArguments().referenceFile.getAbsolutePath())); } outputTracker.getStorage(this).writeHeader(vcfHeader); @@ -225,7 +237,7 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * Gets a string representation of this object. - * @return + * @return a string representation of this object. */ @Override public String toString() { @@ -247,20 +259,20 @@ public class VCFWriterStub implements Stub, VCFWriter { val = String.format("", contig.getSequenceName(), contig.getSequenceLength(), assembly); else val = String.format("", contig.getSequenceName(), contig.getSequenceLength()); - return new VCFHeaderLine("contig", val); + return new VCFHeaderLine(VCFHeader.CONTIG_KEY, val); } private String getReferenceAssembly(String refPath) { // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot String assembly = null; - if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 ) + if (refPath.contains("b37") || refPath.contains("v37")) assembly = "b37"; - else if ( refPath.indexOf("b36") != -1 ) + else if (refPath.contains("b36")) assembly = "b36"; - else if ( refPath.indexOf("hg18") != -1 ) + else if (refPath.contains("hg18")) assembly = "hg18"; - else if ( refPath.indexOf("hg19") != -1 ) + else if (refPath.contains("hg19")) assembly = "hg19"; return assembly; } -} \ No newline at end of file +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 58002bd14..6551bf376 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -250,53 +250,40 @@ public class GATKReportTable { } /** - * Returns the first primary key matching the dotted column values. - * Ex: dbsnp.eval.called.all.novel.all - * - * @param dottedColumnValues Period concatenated values. + * Returns the first primary key matching the column values. + * Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all" + * @param columnValues column values. * @return The first primary key matching the column values or throws an exception. */ - public Object getPrimaryKeyByData(String dottedColumnValues) { - Object key = findPrimaryKey(dottedColumnValues); + public Object getPrimaryKeyByData(Object... columnValues) { + Object key = findPrimaryKeyByData(columnValues); if (key == null) - throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); + throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + Arrays.asList(columnValues)); return key; } - /** - * Returns true if there is at least on row with the dotted column values. - * Ex: dbsnp.eval.called.all.novel.all - * - * @param dottedColumnValues Period concatenated values. - * @return true if there is at least one row matching the columns. - */ - public boolean containsPrimaryKey(String dottedColumnValues) { - return findPrimaryKey(dottedColumnValues) != null; - } - - /** - * Returns the first primary key matching the dotted column values. - * Ex: dbsnp.eval.called.all.novel.all - * - * @param dottedColumnValues Period concatenated values. - * @return The first primary key matching the column values or null. - */ - private Object findPrimaryKey(String dottedColumnValues) { - return findPrimaryKey(dottedColumnValues.split("\\.")); - } - /** * Returns the first primary key matching the column values. - * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" } + * Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all" * * @param columnValues column values. - * @return The first primary key matching the column values. + * @return The first primary key matching the column values or null if the key does not exist. */ - private Object findPrimaryKey(Object[] columnValues) { + public Object findPrimaryKeyByData(Object... columnValues) { + if (columnValues == null) + throw new NullPointerException("Column values is null"); + if (columnValues.length == 0) + throw new IllegalArgumentException("Column values is empty"); + int columnCount = columns.size(); for (Object primaryKey : primaryKeyColumn) { boolean matching = true; - for (int i = 0; matching && i < columnValues.length; i++) { - matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1)); + // i --> index into columnValues parameter + // j --> index into columns collection + for (int i = 0, j = 0; matching && i < columnValues.length && j < columnCount; j++) { + if (!columns.getByIndex(j).isDisplayable()) + continue; + matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i)); + i++; } if (matching) return primaryKey; @@ -360,8 +347,8 @@ public class GATKReportTable { * output file), and the format string used to display the data. * * @param columnName the name of the column - * @param defaultValue the default value of a blank cell - * @param display if true - the column will be displayed; if false - the column will be hidden + * @param defaultValue if true - the column will be displayed; if false - the column will be hidden + * @param display display the column * @param format the format string used to display data */ public void addColumn(String columnName, Object defaultValue, boolean display, String format) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 3066b0bc6..18b8424b2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -157,6 +157,12 @@ public class CombineVariants extends RodWalker { @Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if the variant is present in at least N input files.", required=false) public int minimumN = 1; + /** + * This option allows the suppression of the command line in the VCF header. This is most often usefully when combining variants for dozens or hundreds of smaller VCFs. + */ + @Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false) + public boolean SUPPRESS_COMMAND_LINE_HEADER = false; + @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; @@ -183,7 +189,9 @@ public class CombineVariants extends RodWalker { Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); if ( SET_KEY != null ) headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants")); - vcfWriter.writeHeader(new VCFHeader(headerLines, sitesOnlyVCF ? Collections.emptySet() : samples)); + VCFHeader vcfHeader = new VCFHeader(headerLines, sitesOnlyVCF ? Collections.emptySet() : samples); + vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER); + vcfWriter.writeHeader(vcfHeader); if ( vcfWriter instanceof VCFWriterStub) { sitesOnlyVCF = ((VCFWriterStub)vcfWriter).doNotWriteGenotypes(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java new file mode 100755 index 000000000..714fb938e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.apache.commons.io.FilenameUtils; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.text.ListFileUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.io.File; +import java.util.*; + +/** + * Selects headers from a VCF source. + *

+ *

+ * Often, a VCF containing many headers will need to be subset in order to facilitate certain formatting guidelines. + * SelectHeaders can be used for this purpose. Given a single VCF file, one or more headers can be extracted from the + * file (based on a complete header name or a pattern match). + *

+ *

Input

+ *

+ * A set of VCFs. + *

+ *

+ *

Output

+ *

+ * A header selected VCF. + *

+ *

+ *

Examples

+ *
+ * Select only the FILTER, FORMAT, and INFO headers:
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T SelectHeaders \
+ *   --variant input.vcf \
+ *   -o output.vcf \
+ *   -hn FILTER \
+ *   -hn FORMAT \
+ *   -hn INFO
+ *
+ * Select only the FILTER, FORMAT, and INFO headers and add in the reference file names:
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T SelectHeaders \
+ *   --variant input.vcf \
+ *   -o output.vcf \
+ *   -hn FILTER \
+ *   -hn FORMAT \
+ *   -hn INFO \
+ *   -irn \
+ *   -iln
+ *
+ * Select only the FILTER, FORMAT, and INFO headers, plus any headers with SnpEff:
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T SelectHeaders \
+ *   --variant input.vcf \
+ *   -o output.vcf \
+ *   -hn FILTER \
+ *   -hn FORMAT \
+ *   -hn INFO \
+ *   -he '.*SnpEff.*'
+ * 
+ */ +@SuppressWarnings("unused") +public class SelectHeaders extends RodWalker implements TreeReducible { + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + @Output(doc = "File to which variants should be written", required = true) + protected VCFWriter vcfWriter; + + @Argument(fullName = "header_name", shortName = "hn", doc = "Include header. Can be specified multiple times", required = false) + public Set headerNames; + + @Argument(fullName = "header_expression", shortName = "he", doc = "Regular expression to select many headers from the tracks provided. Can be specified multiple times", required = false) + public Set headerExpressions; + + /** + * Note that header exclusion takes precedence over inclusion, so that if a header is in both lists it will be excluded. + */ + @Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false) + public Set XLheaderNames; + + /** + * Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added. + */ + @Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false) + public boolean includeReference; + + /** + * Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added. + */ + @Argument(fullName = "include_interval_names", shortName = "iln", doc = "If set the interval file name minus the file extension, or the command line intervals, will be added to the headers", required = false) + public boolean includeIntervals; + + /** + * Note that engine header inclusion takes precedence over other header matching. If set other engine lines may be excluded but the intervals will still be added. + */ + @Hidden // TODO: Determine if others find this valuable and either remove @Hidden or remove -ieh. + @Argument(fullName = "include_engine_headers", shortName = "ieh", doc = "If set the headers normally output by the engine will be added to the headers", required = false) + public boolean includeEngineHeaders; + + private static final ListFileUtils.StringConverter headerKey = new ListFileUtils.StringConverter() { + @Override + public String convert(VCFHeaderLine value) { + return value.getKey(); + } + }; + + /** + * Set up the VCF writer, the header expressions and regexps + */ + @Override + public void initialize() { + // Get list of samples to include in the output + List rodNames = Arrays.asList(variantCollection.variants.getName()); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); + + headerLines.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "SelectHeaders")); + + // Select only the headers requested by name or expression. + headerLines = new LinkedHashSet(getSelectedHeaders(headerLines)); + + // Optionally add in the reference. + if (includeReference && getToolkit().getArguments().referenceFile != null) + headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName()))); + + // Optionally add in the intervals. + if (includeIntervals && getToolkit().getArguments().intervals != null) { + for (IntervalBinding intervalBinding : getToolkit().getArguments().intervals) { + String source = intervalBinding.getSource(); + if (source == null) + continue; + File file = new File(source); + if (file.exists()) { + headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, FilenameUtils.getBaseName(file.getName()))); + } else { + headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, source)); + } + } + } + + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples); + vcfHeader.setWriteEngineHeaders(includeEngineHeaders); + vcfWriter.writeHeader(vcfHeader); + } + + private Set getSelectedHeaders(Set headerLines) { + Set selectedHeaders = new TreeSet(); + if (headerNames == null && headerExpressions == null) { + // Include everything if nothing was explicitly included. + selectedHeaders.addAll(headerLines); + } else { + // Only include the selected headers. + if (headerNames != null) + selectedHeaders.addAll(ListFileUtils.includeMatching(headerLines, headerKey, headerNames, true)); + if (headerExpressions != null) + selectedHeaders.addAll(ListFileUtils.includeMatching(headerLines, headerKey, headerExpressions, false)); + } + + // Remove any excluded headers. + if (XLheaderNames != null) + selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true); + return selectedHeaders; + } + + /** + * Pass through the VC record + * + * @param tracker the ROD tracker + * @param ref reference information + * @param context alignment info + * @return number of records processed + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + int count = 0; + if (tracker != null) { + Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); + if (vcs != null) { + for (VariantContext vc : vcs) { + vcfWriter.add(vc); + count++; + } + } + } + return count; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + @Override + public Integer treeReduce(Integer lhs, Integer rhs) { + return lhs + rhs; + } + + @Override + public void onTraversalDone(Integer result) { + logger.info(result + " records processed."); + } +} diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index a3f80af1c..dcdef5aab 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -194,6 +194,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { */ private static final List gatkPackages = Arrays.asList( "org.broadinstitute.sting.gatk", + "org.broadinstitute.sting.pipeline", "org.broadinstitute.sting.analyzecovariates", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); @@ -251,7 +252,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { */ private void writeFilter(String className, List argumentFields, Set> dependents) throws IOException { String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction", - className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents); + className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RUtils.java b/public/java/src/org/broadinstitute/sting/utils/R/RUtils.java new file mode 100644 index 000000000..b52eed5cf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/R/RUtils.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.lang.StringUtils; + +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.Date; + +public class RUtils { + /** + * Converts a collection of values to an R compatible list. A null list will return NA, + * otherwise the values will be escaped with single quotes and combined with c(). + * @param list Collection of values + * @return The R representation of the list + */ + public static String toStringList(Collection list) { + if (list == null) + return "NA"; + if (list.size() == 0) + return "c()"; + return "c('" + StringUtils.join(list, "','") + "')"; + } + + /** + * Converts a collection of values to an R compatible list. A null list will return NA, + * otherwise the values will be combined with c(). + * @param list Collection of values + * @return The R representation of the list + */ + public static String toNumberList(Collection list) { + return list == null ? "NA": "c(" + StringUtils.join(list, ",") + ")"; + } + + /** + * Converts a collection of values to an R compatible list. A null list will return NA, + * otherwise the date will be escaped with single quotes and combined with c(). + * @param list Collection of values + * @return The R representation of the list + */ + public static String toDateList(Collection list) { + return toDateList(list, "''yyyy-MM-dd''"); + } + + /** + * Converts a collection of values to an R compatible list formatted by pattern. + * @param list Collection of values + * @param pattern format pattern string for each date + * @return The R representation of the list + */ + public static String toDateList(Collection list, String pattern) { + + if (list == null) + return "NA"; + SimpleDateFormat format = new SimpleDateFormat(pattern); + StringBuilder sb = new StringBuilder(); + sb.append("c("); + boolean first = true; + for (Date date : list) { + if (!first) sb.append(","); + sb.append(format.format(date)); + first = false; + } + sb.append(")"); + return sb.toString(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index 68b220aab..360a855fa 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -31,14 +31,13 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** @@ -74,10 +73,10 @@ public class SampleUtils { * Same as @link getSAMFileSamples but gets all of the samples * in the SAM files loaded by the engine * - * @param engine - * @return + * @param engine engine + * @return samples */ - public final static Set getSAMFileSamples(GenomeAnalysisEngine engine) { + public static Set getSAMFileSamples(GenomeAnalysisEngine engine) { return SampleUtils.getSAMFileSamples(engine.getSAMFileHeader()); } @@ -209,89 +208,24 @@ public class SampleUtils { * we try to read a file named E from disk, and if possible all lines from that file are expanded * into unique sample names. * - * @param sampleArgs - * @return + * @param sampleArgs args + * @return samples */ public static Set getSamplesFromCommandLineInput(Collection sampleArgs) { if (sampleArgs != null) { - // Let's first go through the list and see if we were given any files. We'll add every entry in the file to our - // sample list set, and treat the entries as if they had been specified on the command line. - Set samplesFromFiles = new HashSet(); - for (String SAMPLE_EXPRESSION : sampleArgs) { - File sampleFile = new File(SAMPLE_EXPRESSION); - - try { - XReadLines reader = new XReadLines(sampleFile); - - List lines = reader.readLines(); - for (String line : lines) { - samplesFromFiles.add(line.trim()); - } - } catch (FileNotFoundException e) { - samplesFromFiles.add(SAMPLE_EXPRESSION); // not a file, so must be a sample - } - } - - return samplesFromFiles; + return ListFileUtils.unpackSet(sampleArgs); } return new HashSet(); } public static Set getSamplesFromCommandLineInput(Collection vcfSamples, Collection sampleExpressions) { - Set samples = new HashSet(); - - if (sampleExpressions != null) { - // Let's first go through the list and see if we were given any files. We'll add every entry in the file to our - // sample list set, and treat the entries as if they had been specified on the command line. - Set samplesFromFiles = new HashSet(); - for (String sampleExpression : sampleExpressions) { - File sampleFile = new File(sampleExpression); - - try { - XReadLines reader = new XReadLines(sampleFile); - - List lines = reader.readLines(); - for (String line : lines) { - samplesFromFiles.add(line); - } - } catch (FileNotFoundException e) { - // ignore exception - } - } - - sampleExpressions.addAll(samplesFromFiles); - - // Let's now assume that the values in sampleExpressions are literal sample names and not regular - // expressions. Extract those samples specifically so we don't make the mistake of selecting more - // than what the user really wants. - Set possibleSampleRegexs = new HashSet(); - for (String sampleExpression : sampleExpressions) { - if (!(new File(sampleExpression).exists())) { - if (vcfSamples.contains(sampleExpression)) { - samples.add(sampleExpression); - } else { - possibleSampleRegexs.add(sampleExpression); - } - } - } - - // Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions - for (String sampleRegex : possibleSampleRegexs) { - Pattern p = Pattern.compile(sampleRegex); - - for (String vcfSample : vcfSamples) { - Matcher m = p.matcher(vcfSample); - if (m.find()) { - samples.add(vcfSample); - } - } - } + Set samples = ListFileUtils.unpackSet(vcfSamples); + if (sampleExpressions == null) { + return samples; } else { - samples.addAll(vcfSamples); + return ListFileUtils.includeMatching(samples, sampleExpressions, false); } - - return samples; } /** @@ -304,16 +238,7 @@ public class SampleUtils { // Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions Set samples = new HashSet(); if (sampleExpressions != null) { - for (String expression : sampleExpressions) { - Pattern p = Pattern.compile(expression); - - for (String originalSample : originalSamples) { - Matcher m = p.matcher(originalSample); - if (m.find()) { - samples.add(originalSample); - } - } - } + samples.addAll(ListFileUtils.includeMatching(originalSamples, sampleExpressions, false)); } return samples; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 27bab8c41..50ff3a656 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -1,5 +1,28 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.util.ParsingUtils; @@ -35,6 +58,11 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; + public static final String SOURCE_KEY = "source"; + public static final String REFERENCE_KEY = "reference"; + public static final String CONTIG_KEY = "contig"; + public static final String INTERVALS_KEY = "intervals"; + // were the input samples sorted originally (or are we sorting them)? private boolean samplesWereAlreadySorted = true; @@ -42,6 +70,8 @@ public class VCFHeader { protected ArrayList sampleNamesInOrder = null; protected HashMap sampleNameToOffset = null; + private boolean writeEngineHeaders = true; + private boolean writeCommandLine = true; /** * create a VCF header, given a list of meta data and auxillary tags @@ -79,6 +109,7 @@ public class VCFHeader { * using this header (i.e., read by the VCFCodec) will have genotypes * occurring in the same order * + * @param genotypeSampleNamesInAppearenceOrder genotype sample names */ protected void buildVCFReaderMaps(List genotypeSampleNamesInAppearenceOrder) { @@ -144,10 +175,7 @@ public class VCFHeader { * @return a set of the header fields, in order */ public Set getHeaderFields() { - Set fields = new LinkedHashSet(); - for (HEADER_FIELDS field : HEADER_FIELDS.values()) - fields.add(field); - return fields; + return new LinkedHashSet(Arrays.asList(HEADER_FIELDS.values())); } /** @@ -217,7 +245,36 @@ public class VCFHeader { public VCFHeaderLine getOtherHeaderLine(String key) { return mOtherMetaData.get(key); } + + /** + * If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output. + * @return true if additional engine headers will be written to the VCF + */ + public boolean isWriteEngineHeaders() { + return writeEngineHeaders; + } + + /** + * If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output. + * @param writeEngineHeaders true if additional engine headers will be written to the VCF + */ + public void setWriteEngineHeaders(boolean writeEngineHeaders) { + this.writeEngineHeaders = writeEngineHeaders; + } + + /** + * If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF. + * @return true if the command line will be written to the VCF + */ + public boolean isWriteCommandLine() { + return writeCommandLine; + } + + /** + * If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF. + * @param writeCommandLine true if the command line will be written to the VCF + */ + public void setWriteCommandLine(boolean writeCommandLine) { + this.writeCommandLine = writeCommandLine; + } } - - - diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index c146bf4d4..a3bc7a75f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -34,9 +34,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; +import java.io.IOException; +import java.util.*; +import java.util.regex.Pattern; /** * A collection of convenience methods for working with list files. @@ -54,6 +54,7 @@ public class ListFileUtils { * LIST_FILE_COMMENT_START are ignored. * * @param samFiles The sam files, in string format. + * @param parser Parser * @return a flattened list of the bam files provided */ public static List unpackBAMFileList(final List samFiles, final ParsingEngine parser) { @@ -63,10 +64,8 @@ public class ListFileUtils { inputFileName = expandFileName(inputFileName); if (inputFileName.toLowerCase().endsWith(".list") ) { try { - for ( String fileName : new XReadLines(new File(inputFileName), true) ) { - if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) { - unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); - } + for ( String fileName : new XReadLines(new File(inputFileName), true, LIST_FILE_COMMENT_START) ) { + unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); } } catch( FileNotFoundException ex ) { @@ -91,9 +90,11 @@ public class ListFileUtils { /** * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine. * @param RODBindings a text equivale + * @param parser Parser * @return a list of expanded, bound RODs. */ @Deprecated + @SuppressWarnings("unused") // TODO: Who is still using this? External walkers? public static Collection unpackRODBindingsOldStyle(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); @@ -112,7 +113,7 @@ public class ListFileUtils { String name = positionalTags.get(0); String type = positionalTags.get(1); - RMDTriplet.RMDStorageType storageType = null; + RMDTriplet.RMDStorageType storageType; if(tags.getValue("storage") != null) storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage")); else if(fileName.toLowerCase().endsWith("stdin")) @@ -129,9 +130,11 @@ public class ListFileUtils { /** * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine. * @param RODBindings a text equivale + * @param parser Parser * @return a list of expanded, bound RODs. */ - public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) { + @SuppressWarnings("unchecked") + public static Collection unpackRODBindings(final Collection RODBindings, @SuppressWarnings("unused") final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); FeatureManager builderForValidation = new FeatureManager(); @@ -142,7 +145,7 @@ public class ListFileUtils { String name = rodBinding.getName(); String type = rodBinding.getTribbleType(); - RMDTriplet.RMDStorageType storageType = null; + RMDTriplet.RMDStorageType storageType; if(rodBinding.getTags().getValue("storage") != null) storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,rodBinding.getTags().getValue("storage")); else if(fileName.toLowerCase().endsWith("stdin")) @@ -184,4 +187,157 @@ public class ListFileUtils { return "/dev/stdin"; return argument; } + + /** + * Returns a new set of values, containing a final set of values expanded from values + *

+ * Each element E of values can either be a literal string or a file ending in .list. + * For each E ending in .list we try to read a file named E from disk, and if possible + * all lines from that file are expanded into unique values. + * + * @param values Original values + * @return entries from values or the files listed in values + */ + public static Set unpackSet(Collection values) { + if (values == null) + throw new NullPointerException("values cannot be null"); + Set unpackedValues = new LinkedHashSet(); + // Let's first go through the list and see if we were given any files. + // We'll add every entry in the file to our set, and treat the entries as + // if they had been specified on the command line. + for (String value : values) { + File file = new File(value); + if (value.toLowerCase().endsWith(".list") && file.exists()) { + try { + unpackedValues.addAll(new XReadLines(file, true, LIST_FILE_COMMENT_START).readLines()); + } catch (IOException e) { + throw new UserException.CouldNotReadInputFile(file, e); + } + } else { + unpackedValues.add(value); + } + } + return unpackedValues; + } + + /** + * Returns a new set of values including only values listed by filters + *

+ * Each element E of values can either be a literal string or a file. For each E, + * we try to read a file named E from disk, and if possible all lines from that file are expanded + * into unique names. + *

+ * Filters may also be a file of filters. + * + * @param values Values or files with values + * @param filters Filters or files with filters + * @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions + * @return entries from values or the files listed in values, filtered by filters + */ + public static Set includeMatching(Collection values, Collection filters, boolean exactMatch) { + return includeMatching(values, IDENTITY_STRING_CONVERTER, filters, exactMatch); + } + + /** + * Converts a type T to a String representation. + * + * @param Type to convert to a String. + */ + public static interface StringConverter { + String convert(T value); + } + + /** + * Returns a new set of values including only values matching filters + *

+ * Filters may also be a file of filters. + *

+ * The converter should convert T to a unique String for each value in the set. + * + * @param values Values or files with values + * @param converter Converts values to strings + * @param filters Filters or files with filters + * @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions + * @return entries from values including only values matching filters + */ + public static Set includeMatching(Collection values, StringConverter converter, Collection filters, boolean exactMatch) { + if (values == null) + throw new NullPointerException("values cannot be null"); + if (converter == null) + throw new NullPointerException("converter cannot be null"); + if (filters == null) + throw new NullPointerException("filters cannot be null"); + + Set unpackedFilters = unpackSet(filters); + Set filteredValues = new LinkedHashSet(); + Collection patterns = null; + if (!exactMatch) + patterns = compilePatterns(unpackedFilters); + for (T value : values) { + String converted = converter.convert(value); + if (unpackedFilters.contains(converted)) { + filteredValues.add(value); + } else if (!exactMatch) { + for (Pattern pattern : patterns) + if (pattern.matcher(converted).find()) + filteredValues.add(value); + } + } + return filteredValues; + } + + /** + * Returns a new set of values excluding any values matching filters. + *

+ * Filters may also be a file of filters. + *

+ * The converter should convert T to a unique String for each value in the set. + * + * @param values Values or files with values + * @param converter Converts values to strings + * @param filters Filters or files with filters + * @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions + * @return entries from values exluding any values matching filters + */ + public static Set excludeMatching(Collection values, StringConverter converter, Collection filters, boolean exactMatch) { + if (values == null) + throw new NullPointerException("values cannot be null"); + if (converter == null) + throw new NullPointerException("converter cannot be null"); + if (filters == null) + throw new NullPointerException("filters cannot be null"); + + Set unpackedFilters = unpackSet(filters); + Set filteredValues = new LinkedHashSet(); + filteredValues.addAll(values); + Collection patterns = null; + if (!exactMatch) + patterns = compilePatterns(unpackedFilters); + for (T value : values) { + String converted = converter.convert(value); + if (unpackedFilters.contains(converted)) { + filteredValues.remove(value); + } else if (!exactMatch) { + for (Pattern pattern : patterns) + if (pattern.matcher(converted).find()) + filteredValues.remove(value); + } + } + return filteredValues; + } + + private static Collection compilePatterns(Collection filters) { + Collection patterns = new ArrayList(); + for (String filter: filters) { + patterns.add(Pattern.compile(filter)); + } + return patterns; + } + + protected static final StringConverter IDENTITY_STRING_CONVERTER = new StringConverter() { + @Override + public String convert(String value) { + return value; + } + }; } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java index 49e9ddf52..b7fc1bdab 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,15 +12,14 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.utils.text; @@ -48,75 +47,92 @@ import java.util.List; * For the love of god, please use this system for reading lines in a file. */ public class XReadLines implements Iterator, Iterable { - private BufferedReader in; // The stream we're reading from - private String nextline = null; // Return value of next call to next() - private boolean trimWhitespace = true; + private final BufferedReader in; // The stream we're reading from + private String nextLine = null; // Return value of next call to next() + private final boolean trimWhitespace; + private final String commentPrefix; + + public XReadLines(final File filename) throws FileNotFoundException { + this(new FileReader(filename), true, null); + } + + public XReadLines(final File filename, final boolean trimWhitespace) throws FileNotFoundException { + this(new FileReader(filename), trimWhitespace, null); + } /** * Creates a new xReadLines object to read lines from filename * - * @param filename - * @throws FileNotFoundException + * @param filename file name + * @param trimWhitespace trim whitespace + * @param commentPrefix prefix for comments or null if no prefix is set + * @throws FileNotFoundException when the file is not found */ - public XReadLines(final File filename, final boolean trimWhitespace) throws FileNotFoundException { - this(new FileReader(filename), trimWhitespace); + public XReadLines(final File filename, final boolean trimWhitespace, final String commentPrefix) throws FileNotFoundException { + this(new FileReader(filename), trimWhitespace, commentPrefix); } - public XReadLines(final File filename) throws FileNotFoundException { - this(filename, true); + public XReadLines(final InputStream inputStream) throws FileNotFoundException { + this(new InputStreamReader(inputStream), true, null); } - /** - * Creates a new xReadLines object to read lines from fileReader - * - * @param fileReader - * @throws FileNotFoundException - */ - public XReadLines(final FileReader fileReader, final boolean trimWhitespace) throws FileNotFoundException { - this(new BufferedReader(fileReader), trimWhitespace); - } - - public XReadLines(final FileReader fileReader) throws FileNotFoundException { - this(fileReader, true); + public XReadLines(final InputStream inputStream, final boolean trimWhitespace) { + this(new InputStreamReader(inputStream), trimWhitespace, null); } /** * Creates a new xReadLines object to read lines from an input stream * - * @param inputStream + * @param inputStream input stream + * @param trimWhitespace trim whitespace + * @param commentPrefix prefix for comments or null if no prefix is set */ - public XReadLines(final InputStream inputStream, final boolean trimWhitespace) { - this(new BufferedReader(new InputStreamReader(inputStream)), trimWhitespace); - } - - public XReadLines(final InputStream inputStream) throws FileNotFoundException { - this(inputStream, true); + public XReadLines(final InputStream inputStream, final boolean trimWhitespace, final String commentPrefix) { + this(new InputStreamReader(inputStream), trimWhitespace, commentPrefix); } /** - * Creates a new xReadLines object to read lines from an bufferedReader + * Creates a new xReadLines object to read lines from a reader * - * @param reader + * @param reader reader + */ + public XReadLines(final Reader reader) { + this(reader, true, null); + } + + /** + * Creates a new xReadLines object to read lines from an reader + * + * @param reader reader + * @param trimWhitespace trim whitespace */ public XReadLines(final Reader reader, final boolean trimWhitespace) { + this(reader, trimWhitespace, null); + } + + /** + * Creates a new xReadLines object to read lines from an bufferedReader + * + * @param reader file name + * @param trimWhitespace trim whitespace + * @param commentPrefix prefix for comments or null if no prefix is set + */ + public XReadLines(final Reader reader, final boolean trimWhitespace, final String commentPrefix) { + this.in = (reader instanceof BufferedReader) ? (BufferedReader)reader : new BufferedReader(reader); + this.trimWhitespace = trimWhitespace; + this.commentPrefix = commentPrefix; try { - this.in = new BufferedReader(reader); - nextline = readNextLine(); - this.trimWhitespace = trimWhitespace; + this.nextLine = readNextLine(); } catch(IOException e) { throw new IllegalArgumentException(e); } } - public XReadLines(final Reader reader) { - this(reader, true); - } - /** * Reads all of the lines in the file, and returns them as a list of strings * - * @return + * @return all of the lines in the file. */ public List readLines() { List lines = new LinkedList(); @@ -128,38 +144,48 @@ public class XReadLines implements Iterator, Iterable { /** * I'm an iterator too... - * @return + * @return an iterator */ public Iterator iterator() { return this; } public boolean hasNext() { - return nextline != null; + return this.nextLine != null; } /** - * Actually reads the next line from the stream, not accessible publically - * @return + * Actually reads the next line from the stream, not accessible publicly + * @return the next line or null + * @throws IOException if an error occurs */ private String readNextLine() throws IOException { - String nextline = in.readLine(); // Read another line - if (nextline != null && trimWhitespace ) - nextline = nextline.trim(); - return nextline; + String nextLine; + while ((nextLine = this.in.readLine()) != null) { + if (this.trimWhitespace) { + nextLine = nextLine.trim(); + if (nextLine.length() == 0) + continue; + } + if (this.commentPrefix != null) + if (nextLine.startsWith(this.commentPrefix)) + continue; + break; + } + return nextLine; } /** - * Returns the next line (minus whitespace) - * @return + * Returns the next line (optionally minus whitespace) + * @return the next line */ public String next() { try { - String result = nextline; - nextline = readNextLine(); + String result = this.nextLine; + this.nextLine = readNextLine(); // If we haven't reached EOF yet - if (nextline == null) { + if (this.nextLine == null) { in.close(); // And close on EOF } diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index ec0db12d3..5759204cf 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -42,13 +42,13 @@ public class GATKReportUnitTest extends BaseTest { Assert.assertEquals(report.getTables().size(), 5); GATKReportTable countVariants = report.getTable("CountVariants"); - Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all"); + Object countVariantsPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "all"); Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520"); Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0"); Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06); GATKReportTable validationReport = report.getTable("ValidationReport"); - Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel"); + Object validationReportPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "novel"); Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN); } @@ -79,6 +79,49 @@ public class GATKReportUnitTest extends BaseTest { Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'"); } + private GATKReportTable makeBasicTable() { + GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value"); + GATKReportTable table = report.getTable("TableName"); + report.addRow("foo.1", "hello"); + report.addRow("foo.2", "world"); + return table; + } + + @Test + public void testDottedSampleName() { + GATKReportTable table = makeBasicTable(); + Object pk; + + pk = table.getPrimaryKeyByData("foo.1"); + Assert.assertEquals(table.get(pk, "value"), "hello"); + + pk = table.getPrimaryKeyByData("foo.2"); + Assert.assertEquals(table.get(pk, "value"), "world"); + } + + @Test + public void testFindPrimaryKeyByData() { + GATKReportTable table = makeBasicTable(); + Assert.assertNotNull(table.findPrimaryKeyByData("foo.1")); + Assert.assertNotNull(table.findPrimaryKeyByData("foo.1", "hello")); + Assert.assertNotNull(table.findPrimaryKeyByData("foo.2")); + Assert.assertNotNull(table.findPrimaryKeyByData("foo.2", "world")); + Assert.assertNull(table.findPrimaryKeyByData("list", "longer", "than", "column", "count")); + Assert.assertNull(table.findPrimaryKeyByData("short")); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testEmptyFindPrimaryKeyByData() { + GATKReportTable table = makeBasicTable(); + table.findPrimaryKeyByData(); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testNullFindPrimaryKeyByData() { + GATKReportTable table = makeBasicTable(); + table.findPrimaryKeyByData((Object[]) null); + } + @Test public void testSimpleGATKReport() { // Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome diff --git a/public/java/test/org/broadinstitute/sting/utils/R/RUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/R/RUtilsUnitTest.java new file mode 100644 index 000000000..23bf074e2 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/R/RUtilsUnitTest.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class RUtilsUnitTest { + @DataProvider(name = "stringLists") + public Object[][] getStringLists() { + return new Object[][] { + new Object[] { null, "NA" }, + new Object[] { Collections.EMPTY_LIST, "c()" }, + new Object[] { Arrays.asList("1", "2", "3"), "c('1','2','3')" } + }; + } + + @Test(dataProvider = "stringLists") + public void testToStringList(List actual, String expected) { + Assert.assertEquals(RUtils.toStringList(actual), expected); + } + + @DataProvider(name = "numberLists") + public Object[][] getNumberLists() { + return new Object[][] { + new Object[] { null, "NA" }, + new Object[] { Collections.EMPTY_LIST, "c()" }, + new Object[] { Arrays.asList(1, 2, 3), "c(1,2,3)" }, + new Object[] { Arrays.asList(1D, 2D, 3D), "c(1.0,2.0,3.0)" } + }; + } + + @Test(dataProvider = "numberLists") + public void testToNumberList(List actual, String expected) { + Assert.assertEquals(RUtils.toNumberList(actual), expected); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java index f0b1de6fe..f21b4bced 100644 --- a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java @@ -28,17 +28,14 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.testng.Assert; -import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.List; - +import java.util.*; /** * Tests selected functionality in the CommandLineExecutable class @@ -74,6 +71,76 @@ public class ListFileUtilsUnitTest extends BaseTest { performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); } + @Test + public void testUnpackSet() throws Exception { + Set expected = new HashSet(Arrays.asList("public/testdata/exampleBAM.bam")); + Set actual; + + actual = ListFileUtils.unpackSet(Arrays.asList("public/testdata/exampleBAM.bam")); + Assert.assertEquals(actual, expected); + + File tempListFile = createTempListFile("testUnpackSet", + "#", + "public/testdata/exampleBAM.bam", + "#public/testdata/foo.bam", + " # public/testdata/bar.bam" + ); + actual = ListFileUtils.unpackSet(Arrays.asList(tempListFile.getAbsolutePath())); + Assert.assertEquals(actual, expected); + } + + @DataProvider(name="includeMatchingTests") + public Object[][] getIncludeMatchingTests() { + return new Object[][] { + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), true, asSet("a") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), false, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), true, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), false, asSet("ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), true, asSet("a") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), false, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), true, asSet("a", "ab") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), false, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), true, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), false, asSet("ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), true, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), false, asSet("a", "ab", "abc") } + }; + } + + @Test(dataProvider = "includeMatchingTests") + public void testIncludeMatching(Set values, Collection filters, boolean exactMatch, Set expected) { + Set actual = ListFileUtils.includeMatching(values, ListFileUtils.IDENTITY_STRING_CONVERTER, filters, exactMatch); + Assert.assertEquals(actual, expected); + } + + @DataProvider(name="excludeMatchingTests") + public Object[][] getExcludeMatchingTests() { + return new Object[][] { + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), true, asSet("ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), false, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), true, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), false, asSet("a") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), true, asSet("ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), false, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), true, asSet("abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), false, Collections.EMPTY_SET }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), true, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), false, asSet("a") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), true, asSet("a", "ab", "abc") }, + new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), false, Collections.EMPTY_SET } + }; + } + + @Test(dataProvider = "excludeMatchingTests") + public void testExcludeMatching(Set values, Collection filters, boolean exactMatch, Set expected) { + Set actual = ListFileUtils.excludeMatching(values, ListFileUtils.IDENTITY_STRING_CONVERTER, filters, exactMatch); + Assert.assertEquals(actual, expected); + } + + private static Set asSet(T... args){ + return new HashSet(Arrays.asList(args)); + } + private File createTempListFile( String tempFilePrefix, String... lines ) throws Exception { File tempListFile = File.createTempFile(tempFilePrefix, ".list"); tempListFile.deleteOnExit(); diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala new file mode 100644 index 000000000..89f2f55fb --- /dev/null +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.qscripts.examples + +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ + +/** + * Script used for testing output to /dev/null + */ +class ExampleReadFilter extends QScript { + @Input(doc="The reference file for the bam files.", shortName="R") + var referenceFile: File = _ + + @Input(doc="Bam file to genotype.", shortName="I") + var bamFile: File = _ + + def script() { + val genotyper = new UnifiedGenotyper with BadMate + genotyper.reference_sequence = referenceFile + genotyper.memoryLimit = 2 + genotyper.input_file :+= bamFile + add(genotyper) + } +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala index 085e0b008..2f604a809 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -49,7 +49,6 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) { else IntervalUtils.parseIntervalArguments(parser, intervals) Collections.sort(parsedLocs) - Collections.unmodifiableList(parsedLocs) val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY) Collections.unmodifiableList(mergedLocs) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 70046c913..8ac711f25 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor * Merges a vcf text file. */ class VcfGatherFunction extends CombineVariants with GatherFunction { + this.assumeIdenticalSamples = true + this.suppressCommandLineHeader = true private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] @@ -43,7 +45,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } this.out = this.originalOutput - this.assumeIdenticalSamples = true // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index 22f4f6225..9d51b01a0 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val table = report.getTable(validation.table) - val key = table.getPrimaryKeyByData(validation.key) + val key = table.getPrimaryKeyByData(validation.table +: validation.key.split('.') : _*) val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala new file mode 100644 index 000000000..7e5e9a93e --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleReadFilterPipelineTest { + @Test + def testExampleReadFilter() { + val spec = new PipelineTestSpec + spec.name = "examplereadfilter" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam").mkString + PipelineTest.executeTest(spec) + } +}