diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c924001b1..239b9ad1a 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -306,7 +306,8 @@ public class GenomeAnalysisEngine { } /** - * Verifies that the supplied set of reads files mesh with what the walker says it requires. + * Verifies that the supplied set of reads files mesh with what the walker says it requires, + * and also makes sure that there were no duplicate SAM files specified on the command line. */ protected void validateSuppliedReads() { GATKArgumentCollection arguments = this.getArguments(); @@ -317,6 +318,32 @@ public class GenomeAnalysisEngine { // Check what the walker says is allowed against what was provided on the command line. if ((arguments.samFiles != null && arguments.samFiles.size() > 0) && !WalkerManager.isAllowed(walker, DataSource.READS)) throw new ArgumentException("Walker does not allow reads but reads were provided."); + + // Make sure no SAM files were specified multiple times by the user. + checkForDuplicateSamFiles(); + } + + /** + * Checks whether there are SAM files that appear multiple times in the fully unpacked list of + * SAM files (samReaderIDs). If there are, throws an ArgumentException listing the files in question. + */ + protected void checkForDuplicateSamFiles() { + Set encounteredSamFiles = new HashSet(); + Set duplicateSamFiles = new LinkedHashSet(); + + for ( SAMReaderID samFile : samReaderIDs ) { + if ( encounteredSamFiles.contains(samFile) ) { + duplicateSamFiles.add(samFile.getSamFilePath()); + } + else { + encounteredSamFiles.add(samFile); + } + } + + if ( duplicateSamFiles.size() > 0 ) { + throw new ArgumentException("The following BAM files appear multiple times in the list of input files: " + + duplicateSamFiles + " BAM files may be specified at most once."); + } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java index 3eee05a54..c84db7770 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java @@ -41,6 +41,19 @@ public class SAMReaderID implements Comparable { this(new File(samFileName),tags); } + /** + * Gets the absolute pathname of this SAM file + * @return The absolute pathname of this reader's SAM file, + * or null if this reader has no associated SAM file + */ + public String getSamFilePath() { + if ( samFile == null ) { + return null; + } + + return samFile.getAbsolutePath(); + } + /** * Gets the tags associated with the given BAM file. * @return A collection of the tags associated with this file. diff --git a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java new file mode 100644 index 000000000..30bf2db07 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.testng.annotations.Test; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.ArgumentException; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.commandline.Tags; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; + + +/** + * Tests selected functionality in the GenomeAnalysisEngine class + */ +public class GenomeAnalysisEngineUnitTest extends BaseTest { + + @Test(expectedExceptions=ArgumentException.class) + public void testDuplicateSamFileHandlingSingleDuplicate() throws Exception { + GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); + + Collection samFiles = new ArrayList(); + samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + + testEngine.setSAMFileIDs(samFiles); + testEngine.checkForDuplicateSamFiles(); + } + + @Test(expectedExceptions=ArgumentException.class) + public void testDuplicateSamFileHandlingMultipleDuplicates() throws Exception { + GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); + + Collection samFiles = new ArrayList(); + samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); + + testEngine.setSAMFileIDs(samFiles); + testEngine.checkForDuplicateSamFiles(); + } +}