diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 6ddfb084d..ed22118e1 100644 --- a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -134,6 +134,14 @@ public abstract class CommandLineProgram { */ protected String getArgumentSourceName( Class source ) { return source.toString(); } + /** + * Sets the command-line parsing engine. Necessary for unit testing purposes. + * @param parser the new command-line parsing engine + */ + public void setParser( ParsingEngine parser ) { + this.parser = parser; + } + /** * this is the function that the inheriting class can expect to have called * when all the argument processing is done diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 5a1f3378e..b001b62d8 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -71,6 +71,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private final Collection argumentSources = new ArrayList(); + /** + * Lines starting with this String in .list files are considered comments. + */ + public static final String LIST_FILE_COMMENT_START = "#"; + /** * this is the function that the inheriting class can expect to have called * when the command line system has initialized. @@ -196,20 +201,25 @@ public abstract class CommandLineExecutable extends CommandLineProgram { /** * Unpack the bam files to be processed, given a list of files. That list of files can - * itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists) + * itself contain entries which are lists of other files to be read (note: you cannot have lists + * of lists of lists). Lines in .list files containing only whitespace or which begin with + * LIST_FILE_COMMENT_START are ignored. * * @param argCollection the command-line arguments from which to extract the BAM file list. * @return a flattened list of the bam files provided */ - private List unpackBAMFileList(GATKArgumentCollection argCollection) { + protected List unpackBAMFileList(GATKArgumentCollection argCollection) { List unpackedReads = new ArrayList(); for( String inputFileName: argCollection.samFiles ) { Tags inputFileNameTags = parser.getTags(inputFileName); inputFileName = expandFileName(inputFileName); if (inputFileName.toLowerCase().endsWith(".list") ) { try { - for(String fileName : new XReadLines(new File(inputFileName))) - unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); + for ( String fileName : new XReadLines(new File(inputFileName), true) ) { + if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) { + unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); + } + } } catch( FileNotFoundException ex ) { throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); diff --git a/java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java b/java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java new file mode 100644 index 000000000..23828056f --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; + + +/** + * Tests selected functionality in the CommandLineExecutable class + */ +public class CommandLineExecutableUnitTest extends BaseTest { + + @Test + public void testIgnoreBlankLinesInBAMListFiles() throws Exception { + File tempListFile = createTempListFile("testIgnoreBlankLines", + "", + "testdata/exampleBAM.bam", + " " + ); + + List expectedBAMFileListAfterUnpacking = new ArrayList(); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + + performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); + } + + @Test + public void testCommentSupportInBAMListFiles() throws Exception { + File tempListFile = createTempListFile("testCommentSupport", + "#", + "testdata/exampleBAM.bam", + "#testdata/foo.bam", + " # testdata/bar.bam" + ); + + List expectedBAMFileListAfterUnpacking = new ArrayList(); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + + performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); + } + + private File createTempListFile( String tempFilePrefix, String... lines ) throws Exception { + File tempListFile = File.createTempFile(tempFilePrefix, ".list"); + tempListFile.deleteOnExit(); + + PrintWriter out = new PrintWriter(tempListFile); + for ( String line : lines ) { + out.println(line); + } + out.close(); + + return tempListFile; + } + + private void performBAMListFileUnpackingTest( File tempListFile, List expectedUnpackedFileList ) throws Exception { + List bamFiles = new ArrayList(); + bamFiles.add(tempListFile.getAbsolutePath()); + + GATKArgumentCollection argCollection = new GATKArgumentCollection(); + argCollection.samFiles = bamFiles; + + CommandLineGATK testInstance = new CommandLineGATK(); + testInstance.setParser(new ParsingEngine(testInstance)); + + List unpackedBAMFileList = testInstance.unpackBAMFileList(argCollection); + + Assert.assertEquals(unpackedBAMFileList.size(), expectedUnpackedFileList.size(), + "Unpacked BAM file list contains extraneous lines"); + Assert.assertEquals(unpackedBAMFileList, expectedUnpackedFileList, + "Unpacked BAM file list does not contain correct BAM file names"); + } +}