gatk-3.8/public/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java

387 lines
18 KiB
Java
Raw Normal View History

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
// the imports for unit testing.
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
/**
* Basic unit test for GenomeLoc
*/
public class GenomeLocUnitTest extends BaseTest {
private static ReferenceSequenceFile seq;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() throws FileNotFoundException {
// sequence
seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
genomeLocParser = new GenomeLocParser(seq);
}
/**
* Tests that we got a string parameter in correctly
*/
@Test
public void testIsBetween() {
logger.warn("Executing testIsBetween");
GenomeLoc locMiddle = genomeLocParser.createGenomeLoc("chr1", 3, 3);
GenomeLoc locLeft = genomeLocParser.createGenomeLoc("chr1", 1, 1);
GenomeLoc locRight = genomeLocParser.createGenomeLoc("chr1", 5, 5);
Assert.assertTrue(locMiddle.isBetween(locLeft, locRight));
Assert.assertFalse(locLeft.isBetween(locMiddle, locRight));
Assert.assertFalse(locRight.isBetween(locLeft, locMiddle));
}
@Test
public void testContigIndex() {
logger.warn("Executing testContigIndex");
GenomeLoc locOne = genomeLocParser.createGenomeLoc("chr1",1,1);
Assert.assertEquals(1, locOne.getContigIndex());
Assert.assertEquals("chr1", locOne.getContig());
GenomeLoc locX = genomeLocParser.createGenomeLoc("chrX",1,1);
Assert.assertEquals(23, locX.getContigIndex());
Assert.assertEquals("chrX", locX.getContig());
GenomeLoc locNumber = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(),1,1);
Assert.assertEquals(1, locNumber.getContigIndex());
Assert.assertEquals("chr1", locNumber.getContig());
Assert.assertEquals(0, locOne.compareTo(locNumber));
}
@Test
public void testCompareTo() {
logger.warn("Executing testCompareTo");
GenomeLoc twoOne = genomeLocParser.createGenomeLoc("chr2", 1);
GenomeLoc twoFive = genomeLocParser.createGenomeLoc("chr2", 5);
GenomeLoc twoOtherFive = genomeLocParser.createGenomeLoc("chr2", 5);
Assert.assertEquals(twoFive.compareTo(twoOtherFive), 0);
Assert.assertEquals(twoOne.compareTo(twoFive), -1);
Assert.assertEquals(twoFive.compareTo(twoOne), 1);
GenomeLoc oneOne = genomeLocParser.createGenomeLoc("chr1", 5);
Assert.assertEquals(oneOne.compareTo(twoOne), -1);
Assert.assertEquals(twoOne.compareTo(oneOne), 1);
}
@Test
public void testUnmappedSort() {
GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000);
GenomeLoc chr2 = genomeLocParser.createGenomeLoc("chr2",1,10000000);
GenomeLoc unmapped = GenomeLoc.UNMAPPED;
List<GenomeLoc> unmappedOnly = Arrays.asList(unmapped);
Collections.sort(unmappedOnly);
Assert.assertEquals(unmappedOnly.size(),1,"Wrong number of elements in unmapped-only list.");
Assert.assertEquals(unmappedOnly.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> chr1Presorted = Arrays.asList(chr1,unmapped);
Collections.sort(chr1Presorted);
Assert.assertEquals(chr1Presorted.size(),2,"Wrong number of elements in chr1,unmapped list.");
Assert.assertEquals(chr1Presorted,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1Inverted = Arrays.asList(unmapped,chr1);
Collections.sort(chr1Inverted);
Assert.assertEquals(chr1Inverted.size(),2,"Wrong number of elements in chr1,unmapped list.");
Assert.assertEquals(chr1Inverted,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2Presorted = Arrays.asList(chr1,chr2,unmapped);
Collections.sort(chr1and2Presorted);
Assert.assertEquals(chr1and2Presorted.size(),3,"Wrong number of elements in chr1,chr2,unmapped list.");
Assert.assertEquals(chr1and2Presorted,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2UnmappedInFront = Arrays.asList(unmapped,chr1,chr2);
Collections.sort(chr1and2UnmappedInFront);
Assert.assertEquals(chr1and2UnmappedInFront.size(),3,"Wrong number of elements in unmapped,chr1,chr2 list.");
Assert.assertEquals(chr1and2UnmappedInFront,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2UnmappedSandwiched = Arrays.asList(chr1,unmapped,chr2);
Collections.sort(chr1and2UnmappedSandwiched);
Assert.assertEquals(chr1and2UnmappedSandwiched.size(),3,"Wrong number of elements in chr1,unmapped,chr2 list.");
Assert.assertEquals(chr1and2UnmappedSandwiched,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
}
@Test
public void testUnmappedMerge() {
GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000);
GenomeLoc unmapped = GenomeLoc.UNMAPPED;
List<GenomeLoc> oneUnmappedOnly = Arrays.asList(unmapped);
oneUnmappedOnly = IntervalUtils.sortAndMergeIntervals(genomeLocParser,oneUnmappedOnly, IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(oneUnmappedOnly.size(),1,"Wrong number of elements in list.");
Assert.assertEquals(oneUnmappedOnly.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> twoUnmapped = Arrays.asList(unmapped,unmapped);
twoUnmapped = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmapped,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmapped.size(),1,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmapped.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> twoUnmappedAtEnd = Arrays.asList(chr1,unmapped,unmapped);
twoUnmappedAtEnd = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedAtEnd,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmappedAtEnd.size(),2,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmappedAtEnd,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> twoUnmappedMixed = Arrays.asList(unmapped,chr1,unmapped);
twoUnmappedMixed = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedMixed,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
}
// -------------------------------------------------------------------------------------
//
// testing overlap detection
//
// -------------------------------------------------------------------------------------
private class ReciprocalOverlapProvider extends TestDataProvider {
GenomeLoc gl1, gl2;
int overlapSize;
double overlapFraction;
private ReciprocalOverlapProvider(int start1, int stop1, int start2, int stop2) {
super(ReciprocalOverlapProvider.class);
gl1 = genomeLocParser.createGenomeLoc("chr1", start1, stop1);
gl2 = genomeLocParser.createGenomeLoc("chr1", start2, stop2);
int shared = 0;
for ( int i = start1; i <= stop1; i++ ) {
if ( i >= start2 && i <= stop2 )
shared++;
}
this.overlapSize = shared;
this.overlapFraction = Math.min((1.0*shared)/gl1.size(), (1.0*shared)/gl2.size());
super.setName(String.format("%d-%d / %d-%d overlap=%d / %.2f", start1, stop1, start2, stop2, overlapSize, overlapFraction));
}
}
@DataProvider(name = "ReciprocalOverlapProvider")
public Object[][] makeReciprocalOverlapProvider() {
for ( int start1 = 1; start1 <= 10; start1++ ) {
for ( int stop1 = start1; stop1 <= 10; stop1++ ) {
new ReciprocalOverlapProvider(start1, stop1, 1, 10);
new ReciprocalOverlapProvider(start1, stop1, 5, 10);
new ReciprocalOverlapProvider(start1, stop1, 5, 7);
new ReciprocalOverlapProvider(start1, stop1, 5, 15);
new ReciprocalOverlapProvider(start1, stop1, 11, 20);
new ReciprocalOverlapProvider(1, 10, start1, stop1);
new ReciprocalOverlapProvider(5, 10, start1, stop1);
new ReciprocalOverlapProvider(5, 7, start1, stop1);
new ReciprocalOverlapProvider(5, 15, start1, stop1);
new ReciprocalOverlapProvider(11, 20, start1, stop1);
}
}
return ReciprocalOverlapProvider.getTests(ReciprocalOverlapProvider.class);
}
@Test(dataProvider = "ReciprocalOverlapProvider")
public void testReciprocalOverlapProvider(ReciprocalOverlapProvider cfg) {
if ( cfg.overlapSize == 0 ) {
Assert.assertFalse(cfg.gl1.overlapsP(cfg.gl2));
} else {
Assert.assertTrue(cfg.gl1.overlapsP(cfg.gl2));
Assert.assertEquals(cfg.gl1.intersect(cfg.gl2).size(), cfg.overlapSize);
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
}
}
// -------------------------------------------------------------------------------------
//
// testing comparison, hashcode, and equals
//
// -------------------------------------------------------------------------------------
@DataProvider(name = "GenomeLocComparisons")
public Object[][] createGenomeLocComparisons() {
List<Object[]> tests = new ArrayList<Object[]>();
final int start = 10;
for ( int stop = start; stop < start + 3; stop++ ) {
final GenomeLoc g1 = genomeLocParser.createGenomeLoc("chr2", start, stop);
for ( final String contig : Arrays.asList("chr1", "chr2", "chr3")) {
for ( int start2 = start - 1; start2 <= stop + 1; start2++ ) {
for ( int stop2 = start2; stop2 < stop + 2; stop2++ ) {
final GenomeLoc g2 = genomeLocParser.createGenomeLoc(contig, start2, stop2);
ComparisonResult cmp = ComparisonResult.EQUALS;
if ( contig.equals("chr3") ) cmp = ComparisonResult.LESS_THAN;
else if ( contig.equals("chr1") ) cmp = ComparisonResult.GREATER_THAN;
else if ( start < start2 ) cmp = ComparisonResult.LESS_THAN;
else if ( start > start2 ) cmp = ComparisonResult.GREATER_THAN;
else if ( stop < stop2 ) cmp = ComparisonResult.LESS_THAN;
else if ( stop > stop2 ) cmp = ComparisonResult.GREATER_THAN;
tests.add(new Object[]{g1, g2, cmp});
}
}
}
}
return tests.toArray(new Object[][]{});
}
private enum ComparisonResult {
LESS_THAN(-1),
EQUALS(0),
GREATER_THAN(1);
final int cmp;
private ComparisonResult(int cmp) {
this.cmp = cmp;
}
}
@Test(dataProvider = "GenomeLocComparisons")
public void testGenomeLocComparisons(GenomeLoc g1, GenomeLoc g2, ComparisonResult expected) {
Assert.assertEquals(g1.compareTo(g2), expected.cmp, "Comparing genome locs failed");
Assert.assertEquals(g1.equals(g2), expected == ComparisonResult.EQUALS);
if ( expected == ComparisonResult.EQUALS )
Assert.assertEquals(g1.hashCode(), g2.hashCode(), "Equal genome locs don't have the same hash code");
}
// -------------------------------------------------------------------------------------
//
// testing merging functionality
//
// -------------------------------------------------------------------------------------
private static final GenomeLoc loc1 = new GenomeLoc("1", 0, 10, 20);
private static final GenomeLoc loc2 = new GenomeLoc("1", 0, 21, 30);
private static final GenomeLoc loc3 = new GenomeLoc("1", 0, 31, 40);
private class MergeTest {
public List<GenomeLoc> locs;
private MergeTest(final List<GenomeLoc> locs) {
this.locs = locs;
}
}
@DataProvider(name = "SGLtest")
public Object[][] createFindVariantRegionsData() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{new MergeTest(Arrays.<GenomeLoc>asList(loc1))});
tests.add(new Object[]{new MergeTest(Arrays.<GenomeLoc>asList(loc1, loc2))});
tests.add(new Object[]{new MergeTest(Arrays.<GenomeLoc>asList(loc1, loc2, loc3))});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "SGLtest", enabled = true)
public void testSimpleGenomeLoc(MergeTest test) {
testMerge(test.locs);
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testNotContiguousLocs() {
final List<GenomeLoc> locs = new ArrayList<GenomeLoc>(1);
locs.add(loc1);
locs.add(loc3);
testMerge(locs);
}
private void testMerge(final List<GenomeLoc> locs) {
GenomeLoc result1 = locs.get(0);
for ( int i = 1; i < locs.size(); i++ )
result1 = GenomeLoc.merge(result1, locs.get(i));
GenomeLoc result2 = GenomeLoc.merge(new TreeSet<GenomeLoc>(locs));
Assert.assertEquals(result1, result2);
Assert.assertEquals(result1.getStart(), locs.get(0).getStart());
Assert.assertEquals(result1.getStop(), locs.get(locs.size() - 1).getStop());
}
// -------------------------------------------------------------------------------------
//
// testing distance functionality
//
// -------------------------------------------------------------------------------------
@Test(enabled=true)
public void testDistanceAcrossContigs() {
final int chrSize = 1000;
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(10, 0, chrSize);
GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
GenomeLoc loc1 = parser.createGenomeLoc("chr3", 500); // to check regular case
GenomeLoc loc2 = parser.createGenomeLoc("chr7", 200); // to check regular case
GenomeLoc loc3 = parser.createGenomeLoc("chr0", 1); // to check corner case
GenomeLoc loc4 = parser.createGenomeLoc("chr9", 1000);// to check corner case
GenomeLoc loc5 = parser.createGenomeLoc("chr7", 500); // to make sure it does the right thing when in the same chromosome
GenomeLoc loc6 = parser.createGenomeLoc("chr7", 200, 300);
GenomeLoc loc7 = parser.createGenomeLoc("chr7", 500, 600);
GenomeLoc loc8 = parser.createGenomeLoc("chr9", 500, 600);
// Locus comparisons
Assert.assertEquals(loc1.distanceAcrossContigs(loc2, header), 3*chrSize + chrSize-loc1.getStop() + loc2.getStart()); // simple case, smaller first
Assert.assertEquals(loc2.distanceAcrossContigs(loc1, header), 3*chrSize + chrSize-loc1.getStop() + loc2.getStart()); // simple case, bigger first
Assert.assertEquals(loc3.distanceAcrossContigs(loc4, header), 10*chrSize - 1); // corner case, smaller first
Assert.assertEquals(loc4.distanceAcrossContigs(loc3, header), 10*chrSize - 1); // corner case, bigger first
Assert.assertEquals(loc2.distanceAcrossContigs(loc5, header), 300); // same contig, smaller first
Assert.assertEquals(loc5.distanceAcrossContigs(loc2, header), 300); // same contig, bigger first
// Interval comparisons
Assert.assertEquals(loc6.distanceAcrossContigs(loc7, header), 200); // same contig, smaller first
Assert.assertEquals(loc7.distanceAcrossContigs(loc6, header), 200); // same contig, bigger first
Assert.assertEquals(loc7.distanceAcrossContigs(loc8, header), chrSize + chrSize-loc7.stop + loc8.getStart()); // across contigs, smaller first
Assert.assertEquals(loc8.distanceAcrossContigs(loc7, header), chrSize + chrSize-loc7.stop + loc8.getStart()); // across congits, bigger first
}
}