diff --git a/java/src/net/sf/samtools/GATKBAMFileSpan.java b/java/src/net/sf/samtools/GATKBAMFileSpan.java index f19c3c56d..623f46291 100644 --- a/java/src/net/sf/samtools/GATKBAMFileSpan.java +++ b/java/src/net/sf/samtools/GATKBAMFileSpan.java @@ -64,6 +64,24 @@ public class GATKBAMFileSpan extends BAMFileSpan { super(Arrays.asList(chunks)); } + @Override + public boolean equals(final Object other) { + if(!(other instanceof BAMFileSpan)) + return false; + + List theseChunks = getChunks(); + List otherChunks = ((BAMFileSpan)other).getChunks(); + + if(theseChunks.size() != otherChunks.size()) + return false; + for(int i = 0; i < theseChunks.size(); i++) { + if(!theseChunks.get(i).equals(otherChunks.get(i))) + return false; + } + + return true; + } + /** * Gets the constituent chunks stored in this span. * @return An unmodifiable list of chunks. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index e322e0cff..1fa02f35f 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -30,6 +30,7 @@ import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -71,6 +72,33 @@ class FilePointer { this.isRegionUnmapped = false; } + @Override + public boolean equals(final Object other) { + if(!(other instanceof FilePointer)) + return false; + FilePointer otherFilePointer = (FilePointer)other; + + // intervals + if(this.locations.size() != otherFilePointer.locations.size()) + return false; + for(int i = 0; i < locations.size(); i++) { + if(!this.locations.get(i).equals(otherFilePointer.locations.get(i))) + return false; + } + + // fileSpans + if(this.fileSpans.size() != otherFilePointer.fileSpans.size()) + return false; + Iterator> thisEntries = this.fileSpans.entrySet().iterator(); + Iterator> otherEntries = otherFilePointer.fileSpans.entrySet().iterator(); + while(thisEntries.hasNext() || otherEntries.hasNext()) { + if(!thisEntries.next().equals(otherEntries.next())) + return false; + } + + return true; + } + public void addLocation(final GenomeLoc location) { locations.add(location); } @@ -153,26 +181,40 @@ class FilePointer { PeekableIterator> otherIterator = new PeekableIterator>(other.fileSpans.entrySet().iterator()); while(thisIterator.hasNext() || otherIterator.hasNext()) { - int compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey()); + int compareValue; + if(!otherIterator.hasNext()) { + compareValue = -1; + } + else if(!thisIterator.hasNext()) + compareValue = 1; + else + compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey()); - if(compareValue < 0) { - // This before other. - Map.Entry entry = thisIterator.next(); - combined.addFileSpans(entry.getKey(),entry.getValue()); - } - else if(compareValue > 0) { - // Other before this. - Map.Entry entry = otherIterator.next(); - combined.addFileSpans(entry.getKey(),entry.getValue()); - } - else { - // equality; union the values. - SAMReaderID reader = thisIterator.peek().getKey(); - GATKBAMFileSpan thisRegion = (GATKBAMFileSpan)thisIterator.next().getValue(); - GATKBAMFileSpan otherRegion = (GATKBAMFileSpan)otherIterator.next().getValue(); - combined.addFileSpans(reader,thisRegion.union(otherRegion)); - } + // This before other. + if(compareValue < 0) + mergeElementsInto(combined,thisIterator); + // Other before this. + else if(compareValue > 0) + mergeElementsInto(combined,otherIterator); + // equality; union the values. + else + mergeElementsInto(combined,thisIterator,otherIterator); } return combined; } + + /** + * Roll the next element in the iterator into the combined entry. + * @param combined Entry into which to roll the next element. + * @param iterators Sources of next elements. + */ + private void mergeElementsInto(final FilePointer combined, Iterator>... iterators) { + if(iterators.length == 0) + throw new ReviewedStingException("Tried to add zero elements to an existing file pointer."); + Map.Entry initialElement = iterators[0].next(); + GATKBAMFileSpan fileSpan = (GATKBAMFileSpan)initialElement.getValue(); + for(int i = 1; i < iterators.length; i++) + fileSpan = fileSpan.union((GATKBAMFileSpan)iterators[i].next().getValue()); + combined.addFileSpans(initialElement.getKey(),fileSpan); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java new file mode 100644 index 000000000..8be59b0c7 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.GATKBAMFileSpan; +import net.sf.samtools.GATKChunk; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * + */ +public class FilePointerUnitTest extends BaseTest { + private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; + private SAMReaderID readerID = new SAMReaderID("samFile",new Tags()); + + /** + * This function does the setup of our parser, before each method call. + *

+ * Called before every test case method. + */ + @BeforeMethod + public void doForEachTest() throws FileNotFoundException { + // sequence + seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary()); + } + + @Test + public void testFilePointerCombineDisjoint() { + FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); + FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10)); + two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,2))); + + FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10)); + result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); + + Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); + Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect"); + } + + @Test + public void testFilePointerCombineJoint() { + FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); + FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",2,6)); + two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,3))); + + FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,6)); + result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,3))); + + Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); + Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect"); + } + + @Test + public void testFilePointerCombineOneSided() { + FilePointer filePointer = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + filePointer.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); + FilePointer empty = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10)); + // Do not add file spans to empty result + + FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10)); + result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); + Assert.assertEquals(filePointer.combine(genomeLocParser,empty),result,"Combination of two file pointers is incorrect"); + Assert.assertEquals(empty.combine(genomeLocParser,filePointer),result,"Combination of two file pointers is incorrect"); + } +}