Fix for bug GSA-441: BAM file list with blank lines gives a confusing error

message. Lines containing only whitespace in .list files are now ignored. 
Also added support for comments in .list files: lines whose first
non-whitespace character is '#' are now also ignored.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5550 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
droazen 2011-04-01 15:04:35 +00:00
parent 4f8411f4b5
commit 0927b7c297
3 changed files with 128 additions and 4 deletions

View File

@ -134,6 +134,14 @@ public abstract class CommandLineProgram {
*/
protected String getArgumentSourceName( Class source ) { return source.toString(); }
/**
* Sets the command-line parsing engine. Necessary for unit testing purposes.
* @param parser the new command-line parsing engine
*/
public void setParser( ParsingEngine parser ) {
this.parser = parser;
}
/**
* this is the function that the inheriting class can expect to have called
* when all the argument processing is done

View File

@ -71,6 +71,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/
private final Collection<Object> argumentSources = new ArrayList<Object>();
/**
* Lines starting with this String in .list files are considered comments.
*/
public static final String LIST_FILE_COMMENT_START = "#";
/**
* this is the function that the inheriting class can expect to have called
* when the command line system has initialized.
@ -196,20 +201,25 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
/**
* Unpack the bam files to be processed, given a list of files. That list of files can
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists)
* itself contain entries which are lists of other files to be read (note: you cannot have lists
* of lists of lists). Lines in .list files containing only whitespace or which begin with
* LIST_FILE_COMMENT_START are ignored.
*
* @param argCollection the command-line arguments from which to extract the BAM file list.
* @return a flattened list of the bam files provided
*/
private List<SAMReaderID> unpackBAMFileList(GATKArgumentCollection argCollection) {
protected List<SAMReaderID> unpackBAMFileList(GATKArgumentCollection argCollection) {
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
for( String inputFileName: argCollection.samFiles ) {
Tags inputFileNameTags = parser.getTags(inputFileName);
inputFileName = expandFileName(inputFileName);
if (inputFileName.toLowerCase().endsWith(".list") ) {
try {
for(String fileName : new XReadLines(new File(inputFileName)))
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
for ( String fileName : new XReadLines(new File(inputFileName), true) ) {
if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) {
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
}
}
}
catch( FileNotFoundException ex ) {
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
/**
* Tests selected functionality in the CommandLineExecutable class
*/
public class CommandLineExecutableUnitTest extends BaseTest {
@Test
public void testIgnoreBlankLinesInBAMListFiles() throws Exception {
File tempListFile = createTempListFile("testIgnoreBlankLines",
"",
"testdata/exampleBAM.bam",
" "
);
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
}
@Test
public void testCommentSupportInBAMListFiles() throws Exception {
File tempListFile = createTempListFile("testCommentSupport",
"#",
"testdata/exampleBAM.bam",
"#testdata/foo.bam",
" # testdata/bar.bam"
);
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
}
private File createTempListFile( String tempFilePrefix, String... lines ) throws Exception {
File tempListFile = File.createTempFile(tempFilePrefix, ".list");
tempListFile.deleteOnExit();
PrintWriter out = new PrintWriter(tempListFile);
for ( String line : lines ) {
out.println(line);
}
out.close();
return tempListFile;
}
private void performBAMListFileUnpackingTest( File tempListFile, List<SAMReaderID> expectedUnpackedFileList ) throws Exception {
List<String> bamFiles = new ArrayList<String>();
bamFiles.add(tempListFile.getAbsolutePath());
GATKArgumentCollection argCollection = new GATKArgumentCollection();
argCollection.samFiles = bamFiles;
CommandLineGATK testInstance = new CommandLineGATK();
testInstance.setParser(new ParsingEngine(testInstance));
List<SAMReaderID> unpackedBAMFileList = testInstance.unpackBAMFileList(argCollection);
Assert.assertEquals(unpackedBAMFileList.size(), expectedUnpackedFileList.size(),
"Unpacked BAM file list contains extraneous lines");
Assert.assertEquals(unpackedBAMFileList, expectedUnpackedFileList,
"Unpacked BAM file list does not contain correct BAM file names");
}
}