From f275be69681bcf87147522b8a929a7ae928a9698 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 10 May 2011 12:56:47 +0000 Subject: [PATCH] A 'fat shard' finder. Cranks through the indices of a BAM file or list of BAM files looking for outliers (outliers right now are defined naively as shards whose sizes are more than 5 stddevs away from the mean). Runs in 13 minutes per chromosome on 707 low pass whole genome BAMs -- not great, but much faster than running UG on the same region to discover anomalies. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5782 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineExecutable.java | 110 +---------- .../gatk/datasources/reads/FilePointer.java | 10 +- .../gatk/datasources/reads/SAMDataSource.java | 2 +- .../reads/utilities/FindLargeShards.java | 187 ++++++++++++++++++ .../sting/utils/text/ListFileUtils.java | 152 ++++++++++++++ .../text/ListFileUtilsUnitTest.java} | 10 +- 6 files changed, 356 insertions(+), 115 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java create mode 100644 java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java rename java/test/org/broadinstitute/sting/{gatk/CommandLineExecutableUnitTest.java => utils/text/ListFileUtilsUnitTest.java} (93%) diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 5fcd3af6a..fd7e749c3 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -47,6 +47,7 @@ import java.util.*; import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.XReadLines; /** @@ -72,11 +73,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private final Collection argumentSources = new ArrayList(); - /** - * Lines starting with this String in .list files are considered comments. - */ - public static final String LIST_FILE_COMMENT_START = "#"; - /** * this is the function that the inheriting class can expect to have called * when the command line system has initialized. @@ -93,8 +89,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { engine.setArguments(getArgumentCollection()); // File lists can require a bit of additional expansion. Set these explicitly by the engine. - engine.setSAMFileIDs(unpackBAMFileList(getArgumentCollection())); - engine.setReferenceMetaDataFiles(unpackRODBindings(getArgumentCollection())); + engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser)); + engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser)); engine.setWalker(walker); walker.setToolkit(engine); @@ -200,104 +196,4 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return engine.getWalkerName((Class)argumentSource); } - /** - * Unpack the bam files to be processed, given a list of files. That list of files can - * itself contain entries which are lists of other files to be read (note: you cannot have lists - * of lists of lists). Lines in .list files containing only whitespace or which begin with - * LIST_FILE_COMMENT_START are ignored. - * - * @param argCollection the command-line arguments from which to extract the BAM file list. - * @return a flattened list of the bam files provided - */ - protected List unpackBAMFileList(GATKArgumentCollection argCollection) { - List unpackedReads = new ArrayList(); - for( String inputFileName: argCollection.samFiles ) { - Tags inputFileNameTags = parser.getTags(inputFileName); - inputFileName = expandFileName(inputFileName); - if (inputFileName.toLowerCase().endsWith(".list") ) { - try { - for ( String fileName : new XReadLines(new File(inputFileName), true) ) { - if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) { - unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); - } - } - } - catch( FileNotFoundException ex ) { - throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); - } - } - else if(inputFileName.toLowerCase().endsWith(".bam")) { - unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); - } - else if(inputFileName.endsWith("stdin")) { - unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); - } - else { - throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + - "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + - "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); - } - } - return unpackedReads; - } - /** - * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine. - * @param argCollection input arguments to the GATK. - * @return a list of expanded, bound RODs. - */ - private Collection unpackRODBindings(GATKArgumentCollection argCollection) { - Collection rodBindings = new ArrayList(); - - for (String fileName: argCollection.RODBindings) { - final Tags tags = parser.getTags(fileName); - fileName = expandFileName(fileName); - - List positionalTags = tags.getPositionalTags(); - if(positionalTags.size() != 2) - throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " + - "Please use the following syntax when providing reference-ordered " + - "data: -B:, ."); - // Assume that if tags are present, those tags are name and type. - // Name is always first, followed by type. - String name = positionalTags.get(0); - String type = positionalTags.get(1); - - RMDStorageType storageType = null; - if(tags.getValue("storage") != null) - storageType = Enum.valueOf(RMDStorageType.class,tags.getValue("storage")); - else if(fileName.toLowerCase().endsWith("stdin")) - storageType = RMDStorageType.STREAM; - else - storageType = RMDStorageType.FILE; - - rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags)); - } - - if (argCollection.DBSNPFile != null) { - if(argCollection.DBSNPFile.toLowerCase().contains("vcf")) - throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf ."); - - final Tags tags = parser.getTags(argCollection.DBSNPFile); - String fileName = expandFileName(argCollection.DBSNPFile); - RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE; - - rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType,tags)); - } - - return rodBindings; - } - - /** - * Expand any special characters that appear in the filename. Right now, '-' is expanded to - * '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed - * directly to the command line in some circumstances could be expanded as well. Be careful - * when adding UNIX-isms. - * @param argument the text appearing on the command-line. - * @return An expanded string suitable for opening by Java/UNIX file handling utilities. - */ - private String expandFileName(String argument) { - if(argument.trim().equals("-")) - return "/dev/stdin"; - return argument; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index 1fa02f35f..c014c1995 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -46,7 +46,7 @@ import java.util.TreeMap; /** * Represents a small section of a BAM file, and every associated interval. */ -class FilePointer { +public class FilePointer { protected final SortedMap fileSpans = new TreeMap(); protected final BAMOverlap overlap; protected final List locations; @@ -72,6 +72,14 @@ class FilePointer { this.isRegionUnmapped = false; } + /** + * Returns an immutable variant of the list of locations. + * @return + */ + public List getLocations() { + return Collections.unmodifiableList(locations); + } + @Override public boolean equals(final Object other) { if(!(other instanceof FilePointer)) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 7a4c632c7..b27b6e61b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -150,7 +150,7 @@ public class SAMDataSource { new ArrayList(), false, false, - false); + true); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java new file mode 100644 index 000000000..d3750ef28 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads.utilities; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.datasources.reads.FilePointer; +import org.broadinstitute.sting.gatk.datasources.reads.LowMemoryIntervalSharder; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.text.ListFileUtils; + +import java.io.File; +import java.io.IOException; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; + +/** + * Traverses a region in a dataset looking for outliers. + */ +public class FindLargeShards extends CommandLineProgram { + private static Logger logger = Logger.getLogger(FindLargeShards.class); + + @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) + public List samFiles = new ArrayList(); + + @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) + public File referenceFile = null; + + @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.",required=false) + public List intervals = null; + + /** + * The square of the sum of all uncompressed data. Based on the BAM spec, the size of this could be + * up to (2^64)^2. + */ + private BigInteger sumOfSquares = BigInteger.valueOf(0); + + /** + * The running sum of all uncompressed data. Based on the BAM spec, the BAM must be less than Long.MAX_LONG + * when compressed -- in other words, the sum of the sizes of all BGZF blocks must be < 2^64. + */ + private BigInteger sum = BigInteger.valueOf(0); + + /** + * The number of shards viewed. + */ + private long numberOfShards; + + + @Override + public int execute() throws IOException { + // initialize reference + IndexedFastaSequenceFile refReader = new IndexedFastaSequenceFile(referenceFile); + GenomeLocParser genomeLocParser = new GenomeLocParser(refReader); + + // initialize reads + List bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser); + SAMDataSource dataSource = new SAMDataSource(bamReaders,genomeLocParser); + + // intervals + GenomeLocSortedSet intervalSortedSet = null; + if(intervals != null) + intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL); + else { + intervalSortedSet = new GenomeLocSortedSet(genomeLocParser); + for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences()) + intervalSortedSet.add(genomeLocParser.createGenomeLoc(entry.getSequenceName(),1,entry.getSequenceLength())); + } + + logger.info(String.format("Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize")); + + LowMemoryIntervalSharder sharder = new LowMemoryIntervalSharder(dataSource,intervalSortedSet); + while(sharder.hasNext()) { + FilePointer filePointer = sharder.next(); + + // Size of the file pointer. + final long size = filePointer.size(); + + BigInteger bigSize = BigInteger.valueOf(size); + sumOfSquares = sumOfSquares.add(bigSize.pow(2)); + sum = sum.add(bigSize); + numberOfShards++; + + if(numberOfShards % 1000 == 0) { + GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser); + logger.info(String.format("Calculating mean and variance: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size)); + } + + } + + // Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N + long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue(); + long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue())); + logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size = %d%n",numberOfShards,mean,stddev)); + + // Crank through the shards again, this time reporting on the shards significantly larger than the mean. + long threshold = mean + stddev*5; + logger.warn(String.format("Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize")); + sharder = new LowMemoryIntervalSharder(dataSource,intervalSortedSet); + while(sharder.hasNext()) { + FilePointer filePointer = sharder.next(); + + // Bounding region. + GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser); + + // Size of the file pointer. + final long size = filePointer.size(); + + numberOfShards++; + + if(filePointer.size() <= threshold) { + if(numberOfShards % 1000 == 0) + logger.info(String.format("%s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size)); + continue; + } + + logger.warn(String.format("FOUND LARGE SHARD: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size)); + } + + return 0; + } + + private GenomeLoc getBoundingRegion(final FilePointer filePointer, final GenomeLocParser genomeLocParser) { + List regions = filePointer.getLocations(); + + // The region contained by this FilePointer. + final String contig = regions.get(0).getContig(); + final int start = regions.get(0).getStart(); + final int stop = regions.get(regions.size()-1).getStop(); + + return genomeLocParser.createGenomeLoc(contig,start,stop); + } + + /** + * Required main method implementation. + * @param argv Command-line argument text. + * @throws Exception on error. + */ + public static void main(String[] argv) throws Exception { + int returnCode = 0; + try { + FindLargeShards instance = new FindLargeShards(); + start(instance, argv); + returnCode = 0; + } + catch(Exception ex) { + returnCode = 1; + ex.printStackTrace(); + throw ex; + } + finally { + System.exit(returnCode); + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java new file mode 100644 index 000000000..9beb7895b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.text; + +import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * A collection of convenience methods for working with list files. + */ +public class ListFileUtils { + /** + * Lines starting with this String in .list files are considered comments. + */ + public static final String LIST_FILE_COMMENT_START = "#"; + + /** + * Unpack the bam files to be processed, given a list of files. That list of files can + * itself contain entries which are lists of other files to be read (note: you cannot have lists + * of lists of lists). Lines in .list files containing only whitespace or which begin with + * LIST_FILE_COMMENT_START are ignored. + * + * @param samFiles The sam files, in string format. + * @return a flattened list of the bam files provided + */ + public static List unpackBAMFileList(final List samFiles, final ParsingEngine parser) { + List unpackedReads = new ArrayList(); + for( String inputFileName: samFiles ) { + Tags inputFileNameTags = parser.getTags(inputFileName); + inputFileName = expandFileName(inputFileName); + if (inputFileName.toLowerCase().endsWith(".list") ) { + try { + for ( String fileName : new XReadLines(new File(inputFileName), true) ) { + if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) { + unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); + } + } + } + catch( FileNotFoundException ex ) { + throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); + } + } + else if(inputFileName.toLowerCase().endsWith(".bam")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); + } + else if(inputFileName.endsWith("stdin")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); + } + else { + throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + + "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); + } + } + return unpackedReads; + } + + /** + * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine. + * @param RODBindings a text equivale + * @return a list of expanded, bound RODs. + */ + public static Collection unpackRODBindings(final List RODBindings, final String dbSNPFile, final ParsingEngine parser) { + Collection rodBindings = new ArrayList(); + + for (String fileName: RODBindings) { + final Tags tags = parser.getTags(fileName); + fileName = expandFileName(fileName); + + List positionalTags = tags.getPositionalTags(); + if(positionalTags.size() != 2) + throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " + + "Please use the following syntax when providing reference-ordered " + + "data: -B:, ."); + // Assume that if tags are present, those tags are name and type. + // Name is always first, followed by type. + String name = positionalTags.get(0); + String type = positionalTags.get(1); + + RMDTriplet.RMDStorageType storageType = null; + if(tags.getValue("storage") != null) + storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage")); + else if(fileName.toLowerCase().endsWith("stdin")) + storageType = RMDTriplet.RMDStorageType.STREAM; + else + storageType = RMDTriplet.RMDStorageType.FILE; + + rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags)); + } + + if (dbSNPFile != null) { + if(dbSNPFile.toLowerCase().contains("vcf")) + throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf ."); + + final Tags tags = parser.getTags(dbSNPFile); + String fileName = expandFileName(dbSNPFile); + RMDTriplet.RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDTriplet.RMDStorageType.STREAM : RMDTriplet.RMDStorageType.FILE; + + rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType,tags)); + } + + return rodBindings; + } + + + /** + * Expand any special characters that appear in the filename. Right now, '-' is expanded to + * '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed + * directly to the command line in some circumstances could be expanded as well. Be careful + * when adding UNIX-isms. + * @param argument the text appearing on the command-line. + * @return An expanded string suitable for opening by Java/UNIX file handling utilities. + */ + private static String expandFileName(String argument) { + if(argument.trim().equals("-")) + return "/dev/stdin"; + return argument; + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java b/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java similarity index 93% rename from java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java rename to java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java index 23828056f..f85a2f599 100644 --- a/java/test/org/broadinstitute/sting/gatk/CommandLineExecutableUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java @@ -22,11 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk; +package org.broadinstitute.sting.utils.text; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.testng.Assert; @@ -42,7 +43,7 @@ import java.util.List; /** * Tests selected functionality in the CommandLineExecutable class */ -public class CommandLineExecutableUnitTest extends BaseTest { +public class ListFileUtilsUnitTest extends BaseTest { @Test public void testIgnoreBlankLinesInBAMListFiles() throws Exception { @@ -90,13 +91,10 @@ public class CommandLineExecutableUnitTest extends BaseTest { List bamFiles = new ArrayList(); bamFiles.add(tempListFile.getAbsolutePath()); - GATKArgumentCollection argCollection = new GATKArgumentCollection(); - argCollection.samFiles = bamFiles; - CommandLineGATK testInstance = new CommandLineGATK(); testInstance.setParser(new ParsingEngine(testInstance)); - List unpackedBAMFileList = testInstance.unpackBAMFileList(argCollection); + List unpackedBAMFileList = ListFileUtils.unpackBAMFileList(bamFiles,new ParsingEngine(testInstance)); Assert.assertEquals(unpackedBAMFileList.size(), expectedUnpackedFileList.size(), "Unpacked BAM file list contains extraneous lines");