From c15751e41eecc9bdf7d2d4da4f39b95870ae27e8 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 15 Jul 2013 13:28:07 -0400 Subject: [PATCH] SAMReaderID: fix bug with hash code and equals() method -Two SAMReaderIDs that pointed at the same underlying bam file through a relative vs. an absolute path were not being treated as equal, and had different hash codes. This was causing problems in the engine, since SAMReaderIDs are often used as the keys of HashMaps. -Fix: explicitly use the absolute path to the encapsulated bam file in hashCode() and equals() -Added tests to ensure this doesn't break again --- .../sting/gatk/GenomeAnalysisEngine.java | 4 +- .../gatk/datasources/reads/SAMReaderID.java | 4 +- .../gatk/GenomeAnalysisEngineUnitTest.java | 18 ++++++- .../reads/SAMReaderIDUnitTest.java | 49 +++++++++++++++++++ 4 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c4f1a286d..f01e8ad62 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -530,8 +530,8 @@ public class GenomeAnalysisEngine { } if ( duplicateSamFiles.size() > 0 ) { - throw new ArgumentException("The following BAM files appear multiple times in the list of input files: " + - duplicateSamFiles + " BAM files may be specified at most once."); + throw new UserException("The following BAM files appear multiple times in the list of input files: " + + duplicateSamFiles + " BAM files may be specified at most once."); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java index 7efab5fb0..72c037707 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java @@ -98,7 +98,7 @@ public class SAMReaderID implements Comparable { if(!(other instanceof SAMReaderID)) return false; SAMReaderID otherID = (SAMReaderID)other; - return this.samFile.equals(otherID.samFile); + return this.getSamFilePath().equals(otherID.getSamFilePath()); } /** @@ -107,7 +107,7 @@ public class SAMReaderID implements Comparable { */ @Override public int hashCode() { - return samFile.hashCode(); + return samFile.getAbsolutePath().hashCode(); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index 3f74e0eae..c3bc78551 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -52,7 +52,7 @@ import java.util.List; */ public class GenomeAnalysisEngineUnitTest extends BaseTest { - @Test(expectedExceptions=ArgumentException.class) + @Test(expectedExceptions=UserException.class) public void testDuplicateSamFileHandlingSingleDuplicate() throws Exception { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); @@ -64,7 +64,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.checkForDuplicateSamFiles(); } - @Test(expectedExceptions=ArgumentException.class) + @Test(expectedExceptions=UserException.class) public void testDuplicateSamFileHandlingMultipleDuplicates() throws Exception { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); @@ -78,6 +78,20 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.checkForDuplicateSamFiles(); } + @Test(expectedExceptions=UserException.class) + public void testDuplicateSamFileHandlingAbsoluteVsRelativePath() { + GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); + + final File relativePathToBAMFile = new File("public/testdata/exampleBAM.bam"); + final File absolutePathToBAMFile = new File(relativePathToBAMFile.getAbsolutePath()); + Collection samFiles = new ArrayList(); + samFiles.add(new SAMReaderID(relativePathToBAMFile, new Tags())); + samFiles.add(new SAMReaderID(absolutePathToBAMFile, new Tags())); + + testEngine.setSAMFileIDs(samFiles); + testEngine.checkForDuplicateSamFiles(); + } + @Test public void testEmptyIntervalSetHandling() throws Exception { GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java new file mode 100644 index 000000000..a594573e5 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class SAMReaderIDUnitTest extends BaseTest { + + @Test + public void testSAMReaderIDHashingAndEquality() { + // Test to make sure that two SAMReaderIDs that point at the same file via an absolute vs. relative + // path are equal according to equals() and have the same hash code + final File relativePathToBAMFile = new File("public/testdata/exampleBAM.bam"); + final File absolutePathToBAMFile = new File(relativePathToBAMFile.getAbsolutePath()); + final SAMReaderID relativePathSAMReaderID = new SAMReaderID(relativePathToBAMFile, new Tags()); + final SAMReaderID absolutePathSAMReaderID = new SAMReaderID(absolutePathToBAMFile, new Tags()); + + Assert.assertEquals(relativePathSAMReaderID, absolutePathSAMReaderID, "Absolute-path and relative-path SAMReaderIDs not equal according to equals()"); + Assert.assertEquals(relativePathSAMReaderID.hashCode(), absolutePathSAMReaderID.hashCode(), "Absolute-path and relative-path SAMReaderIDs have different hash codes"); + } +}