2013-01-11 06:04:08 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2012 The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
|
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
|
|
|
|
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
import net.sf.samtools.SAMFileHeader;
|
2012-11-28 00:00:33 +08:00
|
|
|
import net.sf.samtools.SAMSequenceDictionary;
|
|
|
|
|
import net.sf.samtools.SAMSequenceRecord;
|
2013-01-30 05:51:39 +08:00
|
|
|
import org.broad.tribble.BasicFeature;
|
|
|
|
|
import org.broad.tribble.Feature;
|
2009-06-22 22:39:41 +08:00
|
|
|
import org.broadinstitute.sting.BaseTest;
|
2010-09-12 23:07:38 +08:00
|
|
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
2013-01-30 05:51:39 +08:00
|
|
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
2009-06-22 22:39:41 +08:00
|
|
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
2013-01-30 05:51:39 +08:00
|
|
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|
|
|
|
import org.broadinstitute.variant.variantcontext.Allele;
|
|
|
|
|
import org.broadinstitute.variant.variantcontext.VariantContext;
|
|
|
|
|
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
|
|
|
|
import org.testng.Assert;
|
2010-11-02 05:31:44 +08:00
|
|
|
import org.testng.annotations.BeforeClass;
|
2011-11-18 02:53:46 +08:00
|
|
|
import org.testng.annotations.DataProvider;
|
2010-11-02 05:31:44 +08:00
|
|
|
import org.testng.annotations.Test;
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2013-01-30 05:51:39 +08:00
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.LinkedList;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
|
|
import static org.testng.Assert.assertEquals;
|
|
|
|
|
import static org.testng.Assert.assertTrue;
|
|
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* <p/>
|
2010-04-08 14:14:15 +08:00
|
|
|
* Class GenomeLocParserUnitTest
|
2009-06-22 22:39:41 +08:00
|
|
|
* <p/>
|
|
|
|
|
* Test out the functionality of the new genome loc parser
|
|
|
|
|
*/
|
2010-04-08 14:14:15 +08:00
|
|
|
public class GenomeLocParserUnitTest extends BaseTest {
|
2010-11-11 01:59:50 +08:00
|
|
|
private GenomeLocParser genomeLocParser;
|
2013-01-30 05:51:39 +08:00
|
|
|
private SAMFileHeader header;
|
2009-06-22 22:39:41 +08:00
|
|
|
|
2009-09-22 07:32:24 +08:00
|
|
|
@BeforeClass
|
2013-11-19 01:07:59 +08:00
|
|
|
public void init() {
|
2013-01-30 05:51:39 +08:00
|
|
|
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
2010-11-11 01:59:50 +08:00
|
|
|
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
2011-05-21 10:01:59 +08:00
|
|
|
@Test(expectedExceptions=UserException.MalformedGenomeLoc.class)
|
2009-06-22 22:39:41 +08:00
|
|
|
public void testGetContigIndex() {
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
assertEquals(genomeLocParser.getContigIndex("blah"), -1); // should not be in the reference
|
2010-04-01 20:47:48 +08:00
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testGetContigIndexValid() {
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Test(expectedExceptions=UserException.class)
|
|
|
|
|
public void testGetContigInfoUnknownContig1() {
|
|
|
|
|
assertEquals(null, genomeLocParser.getContigInfo("blah")); // should *not* be in the reference
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions=UserException.class)
|
|
|
|
|
public void testGetContigInfoUnknownContig2() {
|
|
|
|
|
assertEquals(null, genomeLocParser.getContigInfo(null)); // should *not* be in the reference
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test()
|
|
|
|
|
public void testHasContigInfoUnknownContig1() {
|
|
|
|
|
assertEquals(false, genomeLocParser.contigIsInDictionary("blah")); // should *not* be in the reference
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test()
|
|
|
|
|
public void testHasContigInfoUnknownContig2() {
|
|
|
|
|
assertEquals(false, genomeLocParser.contigIsInDictionary(null)); // should *not* be in the reference
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Test()
|
|
|
|
|
public void testHasContigInfoKnownContig() {
|
|
|
|
|
assertEquals(true, genomeLocParser.contigIsInDictionary("chr1")); // should be in the reference
|
|
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testGetContigInfoKnownContig() {
|
2010-11-11 01:59:50 +08:00
|
|
|
assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
2010-11-02 05:31:44 +08:00
|
|
|
@Test(expectedExceptions=ReviewedStingException.class)
|
2009-06-22 22:39:41 +08:00
|
|
|
public void testParseBadString() {
|
2010-11-11 01:59:50 +08:00
|
|
|
genomeLocParser.parseGenomeLoc("Bad:0-1");
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
2012-11-28 00:00:33 +08:00
|
|
|
@Test
|
|
|
|
|
public void testContigHasColon() {
|
|
|
|
|
SAMFileHeader header = new SAMFileHeader();
|
|
|
|
|
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
|
|
|
|
|
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
|
|
|
|
SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
|
|
|
|
|
rec.setSequenceLength(10);
|
|
|
|
|
dict.addSequence(rec);
|
|
|
|
|
header.setSequenceDictionary(dict);
|
|
|
|
|
|
|
|
|
|
final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
|
|
|
|
GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
|
|
|
|
|
assertEquals(0, loc.getContigIndex());
|
|
|
|
|
assertEquals(loc.getStart(), 4);
|
|
|
|
|
assertEquals(loc.getStop(), 5);
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
@Test
|
|
|
|
|
public void testParseGoodString() {
|
2011-05-21 10:01:59 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(0, loc.getContigIndex());
|
2011-05-21 10:01:59 +08:00
|
|
|
assertEquals(loc.getStop(), 10);
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
2012-11-28 00:00:33 +08:00
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc1() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(0, loc.getContigIndex());
|
|
|
|
|
assertEquals(loc.getStop(), 100);
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-22 07:32:24 +08:00
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc1point5() { // in honor of VAAL!
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(0, loc.getContigIndex());
|
|
|
|
|
assertEquals(loc.getStop(), 1);
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 07:32:24 +08:00
|
|
|
}
|
|
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc2() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
|
|
|
|
assertEquals("chr1", loc.getContig());
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getStop(), 100);
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc3() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
|
|
|
|
|
assertEquals("chr1", loc.getContig());
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getStop(), 1);
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc4() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(0, loc.getContigIndex());
|
|
|
|
|
assertEquals(loc.getStop(), 1);
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCreateGenomeLoc5() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
|
|
|
|
GenomeLoc copy = genomeLocParser.createGenomeLoc(loc.getContig(),loc.getStart(),loc.getStop());
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(0, copy.getContigIndex());
|
|
|
|
|
assertEquals(copy.getStop(), 100);
|
|
|
|
|
assertEquals(copy.getStart(), 1);
|
2009-09-22 06:37:47 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-22 07:32:24 +08:00
|
|
|
@Test
|
|
|
|
|
public void testGenomeLocPlusSign() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1+");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 07:32:24 +08:00
|
|
|
}
|
|
|
|
|
|
2009-12-24 05:59:14 +08:00
|
|
|
@Test
|
2009-09-22 12:34:18 +08:00
|
|
|
public void testGenomeLocParseOnlyChrome() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 12:34:18 +08:00
|
|
|
}
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2010-11-02 05:31:44 +08:00
|
|
|
@Test(expectedExceptions=ReviewedStingException.class)
|
2009-09-22 12:34:18 +08:00
|
|
|
public void testGenomeLocParseOnlyBadChrome() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr12");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 12:34:18 +08:00
|
|
|
}
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2010-11-02 05:31:44 +08:00
|
|
|
@Test(expectedExceptions=ReviewedStingException.class)
|
2009-09-22 07:32:24 +08:00
|
|
|
public void testGenomeLocBad() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2011-05-21 10:01:59 +08:00
|
|
|
@Test(expectedExceptions=UserException.class)
|
2009-09-22 12:34:18 +08:00
|
|
|
public void testGenomeLocBad2() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 12:34:18 +08:00
|
|
|
}
|
2009-12-24 05:59:14 +08:00
|
|
|
|
2011-05-21 10:01:59 +08:00
|
|
|
@Test(expectedExceptions=UserException.class)
|
2009-09-22 12:34:18 +08:00
|
|
|
public void testGenomeLocBad3() {
|
2010-11-11 01:59:50 +08:00
|
|
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0");
|
2010-11-02 05:31:44 +08:00
|
|
|
assertEquals(loc.getContigIndex(), 0);
|
|
|
|
|
assertEquals(loc.getStop(), 10); // the size
|
|
|
|
|
assertEquals(loc.getStart(), 1);
|
2009-09-22 12:34:18 +08:00
|
|
|
}
|
2010-02-17 04:35:35 +08:00
|
|
|
|
|
|
|
|
// test out the validating methods
|
|
|
|
|
@Test
|
|
|
|
|
public void testValidationOfGenomeLocs() {
|
2011-05-21 10:01:59 +08:00
|
|
|
assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1));
|
|
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry
|
|
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
|
|
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
|
|
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
|
2013-01-30 05:51:39 +08:00
|
|
|
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
|
2011-05-21 10:01:59 +08:00
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
|
2013-01-30 05:51:39 +08:00
|
|
|
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
|
|
|
|
|
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions = ReviewedStingException.class)
|
|
|
|
|
public void testValidateGenomeLoc() {
|
|
|
|
|
// bad contig index
|
|
|
|
|
genomeLocParser.validateGenomeLoc("chr1", 1, 1, 2, false);
|
2010-02-17 04:35:35 +08:00
|
|
|
}
|
2011-11-18 02:53:46 +08:00
|
|
|
|
|
|
|
|
private static class FlankingGenomeLocTestData extends TestDataProvider {
|
|
|
|
|
final GenomeLocParser parser;
|
|
|
|
|
final int basePairs;
|
|
|
|
|
final GenomeLoc original, flankStart, flankStop;
|
|
|
|
|
|
|
|
|
|
private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
|
|
|
|
|
super(FlankingGenomeLocTestData.class, name);
|
|
|
|
|
this.parser = parser;
|
|
|
|
|
this.basePairs = basePairs;
|
|
|
|
|
this.original = parse(parser, original);
|
|
|
|
|
this.flankStart = flankStart == null ? null : parse(parser, flankStart);
|
|
|
|
|
this.flankStop = flankStop == null ? null : parse(parser, flankStop);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static GenomeLoc parse(GenomeLocParser parser, String str) {
|
|
|
|
|
return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@DataProvider(name = "flankingGenomeLocs")
|
|
|
|
|
public Object[][] getFlankingGenomeLocs() {
|
|
|
|
|
int contigLength = 10000;
|
|
|
|
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
|
|
|
|
|
GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atStartBase1", parser, 1,
|
|
|
|
|
"chr1:1", null, "chr1:2");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atStartBase50", parser, 50,
|
|
|
|
|
"chr1:1", null, "chr1:2-51");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atStartRange50", parser, 50,
|
|
|
|
|
"chr1:1-10", null, "chr1:11-60");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atEndBase1", parser, 1,
|
|
|
|
|
"chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atEndBase50", parser, 50,
|
|
|
|
|
"chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("atEndRange50", parser, 50,
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 10, contigLength),
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
|
|
|
|
|
null);
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
|
|
|
|
|
"chr1:2", "chr1:1", "chr1:3");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
|
|
|
|
|
"chr1:21-30", "chr1:1-20", "chr1:31-80");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
|
|
|
|
|
"chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 20, contigLength));
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
|
|
|
|
|
"chr1:3", "chr1:2", "chr1:4");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
|
|
|
|
|
"chr1:101-200", "chr1:51-100", "chr1:201-250");
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
|
|
|
|
|
"chr1:" + (contigLength - 3),
|
|
|
|
|
"chr1:" + (contigLength - 4),
|
|
|
|
|
"chr1:" + (contigLength - 2));
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
|
|
|
|
|
String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("unmapped", parser, 50,
|
|
|
|
|
"unmapped", null, null);
|
|
|
|
|
|
|
|
|
|
new FlankingGenomeLocTestData("fullContig", parser, 50,
|
|
|
|
|
"chr1", null, null);
|
|
|
|
|
|
|
|
|
|
return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(dataProvider = "flankingGenomeLocs")
|
|
|
|
|
public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
|
|
|
|
|
GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
|
|
|
|
|
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
|
|
|
|
data.toString(), data.original, actual, data.flankStart);
|
|
|
|
|
assertEquals(actual, data.flankStart, description);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(dataProvider = "flankingGenomeLocs")
|
|
|
|
|
public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
|
|
|
|
|
GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
|
|
|
|
|
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
|
|
|
|
data.toString(), data.original, actual, data.flankStop);
|
|
|
|
|
assertEquals(actual, data.flankStop, description);
|
|
|
|
|
}
|
2013-01-30 05:51:39 +08:00
|
|
|
|
|
|
|
|
@DataProvider(name = "parseGenomeLoc")
|
|
|
|
|
public Object[][] makeParsingTest() {
|
|
|
|
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
|
|
|
|
|
|
|
|
|
tests.add(new Object[]{ "chr1:10", "chr1", 10 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:100", "chr1", 100 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1000", "chr1", 1000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1,000", "chr1", 1000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:10000", "chr1", 10000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:10,000", "chr1", 10000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:100000", "chr1", 100000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:100,000", "chr1", 100000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1000000", "chr1", 1000000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1,000,000", "chr1", 1000000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1000,000", "chr1", 1000000 });
|
|
|
|
|
tests.add(new Object[]{ "chr1:1,000000", "chr1", 1000000 });
|
|
|
|
|
|
|
|
|
|
return tests.toArray(new Object[][]{});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( dataProvider = "parseGenomeLoc")
|
|
|
|
|
public void testParsingPositions(final String string, final String contig, final int start) {
|
|
|
|
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000);
|
|
|
|
|
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), contig);
|
|
|
|
|
Assert.assertEquals(loc.getStart(), start);
|
|
|
|
|
Assert.assertEquals(loc.getStop(), start);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( )
|
|
|
|
|
public void testCreationFromSAMRecord() {
|
|
|
|
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
|
|
|
|
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
|
|
|
|
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
|
|
|
|
Assert.assertEquals(loc.getStop(), read.getAlignmentEnd());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( )
|
|
|
|
|
public void testCreationFromSAMRecordUnmapped() {
|
|
|
|
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
|
|
|
|
read.setReadUnmappedFlag(true);
|
|
|
|
|
read.setReferenceIndex(-1);
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
|
|
|
|
Assert.assertTrue(loc.isUnmapped());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( )
|
|
|
|
|
public void testCreationFromSAMRecordUnmappedButOnGenome() {
|
|
|
|
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
|
|
|
|
read.setReadUnmappedFlag(true);
|
|
|
|
|
read.setCigarString("*");
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
|
|
|
|
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
|
|
|
|
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
|
|
|
|
Assert.assertEquals(loc.getStop(), read.getAlignmentStart());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCreationFromFeature() {
|
|
|
|
|
final Feature feature = new BasicFeature("chr1", 1, 5);
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), feature.getChr());
|
|
|
|
|
Assert.assertEquals(loc.getStart(), feature.getStart());
|
|
|
|
|
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCreationFromVariantContext() {
|
|
|
|
|
final VariantContext feature = new VariantContextBuilder("x", "chr1", 1, 5, Arrays.asList(Allele.create("AAAAA", true))).make();
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), feature.getChr());
|
|
|
|
|
Assert.assertEquals(loc.getStart(), feature.getStart());
|
|
|
|
|
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testcreateGenomeLocOnContig() throws FileNotFoundException {
|
|
|
|
|
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
|
|
|
|
final SAMSequenceDictionary dict = seq.getSequenceDictionary();
|
|
|
|
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(dict);
|
|
|
|
|
|
|
|
|
|
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createOverEntireContig(rec.getSequenceName());
|
|
|
|
|
Assert.assertEquals(loc.getContig(), rec.getSequenceName());
|
|
|
|
|
Assert.assertEquals(loc.getStart(), 1);
|
|
|
|
|
Assert.assertEquals(loc.getStop(), rec.getSequenceLength());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@DataProvider(name = "GenomeLocOnContig")
|
|
|
|
|
public Object[][] makeGenomeLocOnContig() {
|
|
|
|
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
|
|
|
|
|
|
|
|
|
final int contigLength = header.getSequence(0).getSequenceLength();
|
|
|
|
|
for ( int start = -10; start < contigLength + 10; start++ ) {
|
|
|
|
|
for ( final int len : Arrays.asList(1, 10, 20) ) {
|
|
|
|
|
tests.add(new Object[]{ "chr1", start, start + len });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tests.toArray(new Object[][]{});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( dataProvider = "GenomeLocOnContig")
|
|
|
|
|
public void testGenomeLocOnContig(final String contig, final int start, final int stop) {
|
|
|
|
|
final int contigLength = header.getSequence(0).getSequenceLength();
|
|
|
|
|
final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, stop);
|
|
|
|
|
|
|
|
|
|
if ( stop < 1 || start > contigLength )
|
|
|
|
|
Assert.assertNull(loc, "GenomeLoc should be null if the start/stops are not meaningful");
|
|
|
|
|
else {
|
|
|
|
|
Assert.assertNotNull(loc);
|
|
|
|
|
Assert.assertEquals(loc.getContig(), contig);
|
|
|
|
|
Assert.assertEquals(loc.getStart(), Math.max(start, 1));
|
|
|
|
|
Assert.assertEquals(loc.getStop(), Math.min(stop, contigLength));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@DataProvider(name = "GenomeLocPadding")
|
|
|
|
|
public Object[][] makeGenomeLocPadding() {
|
|
|
|
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
|
|
|
|
|
|
|
|
|
final int contigLength = header.getSequence(0).getSequenceLength();
|
|
|
|
|
for ( int pad = 0; pad < contigLength + 1; pad++) {
|
|
|
|
|
for ( int start = 1; start < contigLength; start++ ) {
|
|
|
|
|
for ( int stop = start; stop < contigLength; stop++ ) {
|
|
|
|
|
tests.add(new Object[]{ genomeLocParser.createGenomeLoc("chr1", start, stop), pad});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tests.toArray(new Object[][]{});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test( dataProvider = "GenomeLocPadding")
|
|
|
|
|
public void testGenomeLocPadding(final GenomeLoc input, final int pad) {
|
|
|
|
|
final int contigLength = header.getSequence(0).getSequenceLength();
|
|
|
|
|
final GenomeLoc padded = genomeLocParser.createPaddedGenomeLoc(input, pad);
|
|
|
|
|
|
|
|
|
|
Assert.assertNotNull(padded);
|
|
|
|
|
Assert.assertEquals(padded.getContig(), input.getContig());
|
|
|
|
|
Assert.assertEquals(padded.getStart(), Math.max(input.getStart() - pad, 1));
|
|
|
|
|
Assert.assertEquals(padded.getStop(), Math.min(input.getStop() + pad, contigLength));
|
|
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
}
|