Fix for bug GSA-430: Can't specify same BAM file twice on the command line. An ArgumentException with an appropriate error message and a list of the duplicate BAMs is now thrown in this case.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5542 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
droazen 2011-03-30 22:23:24 +00:00
parent deab9f0aa5
commit 7b452ea2b9
3 changed files with 110 additions and 1 deletions

View File

@ -306,7 +306,8 @@ public class GenomeAnalysisEngine {
}
/**
* Verifies that the supplied set of reads files mesh with what the walker says it requires.
* Verifies that the supplied set of reads files mesh with what the walker says it requires,
* and also makes sure that there were no duplicate SAM files specified on the command line.
*/
protected void validateSuppliedReads() {
GATKArgumentCollection arguments = this.getArguments();
@ -317,6 +318,32 @@ public class GenomeAnalysisEngine {
// Check what the walker says is allowed against what was provided on the command line.
if ((arguments.samFiles != null && arguments.samFiles.size() > 0) && !WalkerManager.isAllowed(walker, DataSource.READS))
throw new ArgumentException("Walker does not allow reads but reads were provided.");
// Make sure no SAM files were specified multiple times by the user.
checkForDuplicateSamFiles();
}
/**
* Checks whether there are SAM files that appear multiple times in the fully unpacked list of
* SAM files (samReaderIDs). If there are, throws an ArgumentException listing the files in question.
*/
protected void checkForDuplicateSamFiles() {
Set<SAMReaderID> encounteredSamFiles = new HashSet<SAMReaderID>();
Set<String> duplicateSamFiles = new LinkedHashSet<String>();
for ( SAMReaderID samFile : samReaderIDs ) {
if ( encounteredSamFiles.contains(samFile) ) {
duplicateSamFiles.add(samFile.getSamFilePath());
}
else {
encounteredSamFiles.add(samFile);
}
}
if ( duplicateSamFiles.size() > 0 ) {
throw new ArgumentException("The following BAM files appear multiple times in the list of input files: " +
duplicateSamFiles + " BAM files may be specified at most once.");
}
}
/**

View File

@ -41,6 +41,19 @@ public class SAMReaderID implements Comparable {
this(new File(samFileName),tags);
}
/**
* Gets the absolute pathname of this SAM file
* @return The absolute pathname of this reader's SAM file,
* or null if this reader has no associated SAM file
*/
public String getSamFilePath() {
if ( samFile == null ) {
return null;
}
return samFile.getAbsolutePath();
}
/**
* Gets the tags associated with the given BAM file.
* @return A collection of the tags associated with this file.

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import org.testng.annotations.Test;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.ArgumentException;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.commandline.Tags;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
/**
* Tests selected functionality in the GenomeAnalysisEngine class
*/
public class GenomeAnalysisEngineUnitTest extends BaseTest {
@Test(expectedExceptions=ArgumentException.class)
public void testDuplicateSamFileHandlingSingleDuplicate() throws Exception {
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
testEngine.setSAMFileIDs(samFiles);
testEngine.checkForDuplicateSamFiles();
}
@Test(expectedExceptions=ArgumentException.class)
public void testDuplicateSamFileHandlingMultipleDuplicates() throws Exception {
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
testEngine.setSAMFileIDs(samFiles);
testEngine.checkForDuplicateSamFiles();
}
}