gatk-3.8/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java

510 lines
22 KiB
Java
Raw Normal View History

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broad.tribble.BasicFeature;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
/**
* @author aaron
* <p/>
* Class GenomeLocParserUnitTest
* <p/>
* Test out the functionality of the new genome loc parser
*/
public class GenomeLocParserUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private SAMFileHeader header;
@BeforeClass
Adding Graph-based likelihood ratio calculation to HC To active this feature add '--likelihoodCalculationEngine GraphBased' to the HC command line. New HC Options (both Advanced and Hidden): ========================================== --likelihoodCalculationEngine PairHMM/GraphBased/Random (default PairHMM) Specifies what engine should be used to generate read vs haplotype likelihoods. PairHMM : standard full-PairHMM approach. GraphBased : using the assembly graph to accelarate the process. Random : generate random likelihoods - used for benchmarking purposes only. --heterogeneousKmerSizeResolution COMBO_MIN/COMBO_MAX/MAX_ONLY/MIN_ONLY (default COMBO_MIN) It idicates how to merge haplotypes produced using different kmerSizes. Only has effect when used in combination with (--likelihooCalculationEngine GraphBased) COMBO_MIN : use the smallest kmerSize with all haplotypes. COMBO_MAX : use the larger kmerSize with all haplotypes. MIN_ONLY : use the smallest kmerSize with haplotypes assembled using it. MAX_ONLY : use the larger kmerSize with haplotypes asembled using it. Major code changes: =================== * Introduce multiple likelihood calculation engines (before there was just one). * Assembly results from different kmerSies are now packed together using the AssemblyResultSet class. * Added yet another PairHMM implementation with a different API in order to spport local PairHMM calculations, (e.g. a segment of the read vs a segment of the haplotype). Major components: ================ * FastLoglessPairHMM: New pair-hmm implemtation using some heuristic to speed up partial PairHMM calculations * GraphBasedLikelihoodCalculationEngine: delegates onto GraphBasedLikelihoodCalculationEngineInstance the exectution of the graph-based likelihood approach. * GraphBasedLikelihoodCalculationEngineInstance: one instance per active-region, implements the graph traversals to calcualte the likelihoods using the graph as an scafold. * HaplotypeGraph: haplotype threading graph where build from the assembly haplotypes. This structure is the one used by GraphBasedLikelihoodCalculationEngineInstance to do its work. * ReadAnchoring and KmerSequenceGraphMap: contain information as how a read map on the HaplotypeGraph that is used by GraphBasedLikelihoodCalcuationEngineInstance to do its work. Remove mergeCommonChains from HaplotypeGraph creation Fixed bamboo issues with HaplotypeGraphUnitTest Fixed probrems with HaplotypeCallerIntegrationTest Fixed issue with GraphLikelihoodVsLoglessAccuracyIntegrationTest Fixed ReadThreadingLikelihoodCalculationEngine issues Moved event-block iteration outside GraphBased*EngineInstance Removed unecessary parameter from ReadAnchoring constructor. Fixed test problem Added a bit more documentation to EventBlockSearchEngine Fixing some private - protected dependency issues Further refactoring making GraphBased*Instance and HaplotypeGraph slimmer. Addressed last pull request commit comments Fixed FastLoglessPairHMM public -> protected dependency Fixed probrem with HaplotypeGraph unit test Adding Graph-based likelihood ratio calculation to HC To active this feature add '--likelihoodCalculationEngine GraphBased' to the HC command line. New HC Options (both Advanced and Hidden): ========================================== --likelihoodCalculationEngine PairHMM/GraphBased/Random (default PairHMM) Specifies what engine should be used to generate read vs haplotype likelihoods. PairHMM : standard full-PairHMM approach. GraphBased : using the assembly graph to accelarate the process. Random : generate random likelihoods - used for benchmarking purposes only. --heterogeneousKmerSizeResolution COMBO_MIN/COMBO_MAX/MAX_ONLY/MIN_ONLY (default COMBO_MIN) It idicates how to merge haplotypes produced using different kmerSizes. Only has effect when used in combination with (--likelihooCalculationEngine GraphBased) COMBO_MIN : use the smallest kmerSize with all haplotypes. COMBO_MAX : use the larger kmerSize with all haplotypes. MIN_ONLY : use the smallest kmerSize with haplotypes assembled using it. MAX_ONLY : use the larger kmerSize with haplotypes asembled using it. Major code changes: =================== * Introduce multiple likelihood calculation engines (before there was just one). * Assembly results from different kmerSies are now packed together using the AssemblyResultSet class. * Added yet another PairHMM implementation with a different API in order to spport local PairHMM calculations, (e.g. a segment of the read vs a segment of the haplotype). Major components: ================ * FastLoglessPairHMM: New pair-hmm implemtation using some heuristic to speed up partial PairHMM calculations * GraphBasedLikelihoodCalculationEngine: delegates onto GraphBasedLikelihoodCalculationEngineInstance the exectution of the graph-based likelihood approach. * GraphBasedLikelihoodCalculationEngineInstance: one instance per active-region, implements the graph traversals to calcualte the likelihoods using the graph as an scafold. * HaplotypeGraph: haplotype threading graph where build from the assembly haplotypes. This structure is the one used by GraphBasedLikelihoodCalculationEngineInstance to do its work. * ReadAnchoring and KmerSequenceGraphMap: contain information as how a read map on the HaplotypeGraph that is used by GraphBasedLikelihoodCalcuationEngineInstance to do its work. Remove mergeCommonChains from HaplotypeGraph creation Fixed bamboo issues with HaplotypeGraphUnitTest Fixed probrems with HaplotypeCallerIntegrationTest Fixed issue with GraphLikelihoodVsLoglessAccuracyIntegrationTest Fixed ReadThreadingLikelihoodCalculationEngine issues Moved event-block iteration outside GraphBased*EngineInstance Removed unecessary parameter from ReadAnchoring constructor. Fixed test problem Added a bit more documentation to EventBlockSearchEngine Fixing some private - protected dependency issues Further refactoring making GraphBased*Instance and HaplotypeGraph slimmer. Addressed last pull request commit comments Fixed FastLoglessPairHMM public -> protected dependency Fixed probrem with HaplotypeGraph unit test
2013-11-19 01:07:59 +08:00
public void init() {
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
@Test(expectedExceptions=UserException.MalformedGenomeLoc.class)
public void testGetContigIndex() {
assertEquals(genomeLocParser.getContigIndex("blah"), -1); // should not be in the reference
}
@Test
public void testGetContigIndexValid() {
assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
}
@Test(expectedExceptions=UserException.class)
public void testGetContigInfoUnknownContig1() {
assertEquals(null, genomeLocParser.getContigInfo("blah")); // should *not* be in the reference
}
@Test(expectedExceptions=UserException.class)
public void testGetContigInfoUnknownContig2() {
assertEquals(null, genomeLocParser.getContigInfo(null)); // should *not* be in the reference
}
@Test()
public void testHasContigInfoUnknownContig1() {
assertEquals(false, genomeLocParser.contigIsInDictionary("blah")); // should *not* be in the reference
}
@Test()
public void testHasContigInfoUnknownContig2() {
assertEquals(false, genomeLocParser.contigIsInDictionary(null)); // should *not* be in the reference
}
@Test()
public void testHasContigInfoKnownContig() {
assertEquals(true, genomeLocParser.contigIsInDictionary("chr1")); // should be in the reference
}
@Test
public void testGetContigInfoKnownContig() {
assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
}
@Test(expectedExceptions=ReviewedStingException.class)
public void testParseBadString() {
genomeLocParser.parseGenomeLoc("Bad:0-1");
}
@Test
public void testContigHasColon() {
SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
SAMSequenceDictionary dict = new SAMSequenceDictionary();
SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
rec.setSequenceLength(10);
dict.addSequence(rec);
header.setSequenceDictionary(dict);
final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStart(), 4);
assertEquals(loc.getStop(), 5);
}
@Test
public void testParseGoodString() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStop(), 10);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc1() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStop(), 100);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc1point5() { // in honor of VAAL!
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1");
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStop(), 1);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc2() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
assertEquals("chr1", loc.getContig());
assertEquals(loc.getStop(), 100);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc3() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
assertEquals("chr1", loc.getContig());
assertEquals(loc.getStop(), 1);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc4() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStop(), 1);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc5() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
GenomeLoc copy = genomeLocParser.createGenomeLoc(loc.getContig(),loc.getStart(),loc.getStop());
assertEquals(0, copy.getContigIndex());
assertEquals(copy.getStop(), 100);
assertEquals(copy.getStart(), 1);
}
@Test
public void testGenomeLocPlusSign() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1+");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
@Test
public void testGenomeLocParseOnlyChrome() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
@Test(expectedExceptions=ReviewedStingException.class)
public void testGenomeLocParseOnlyBadChrome() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr12");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
@Test(expectedExceptions=ReviewedStingException.class)
public void testGenomeLocBad() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
@Test(expectedExceptions=UserException.class)
public void testGenomeLocBad2() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
@Test(expectedExceptions=UserException.class)
public void testGenomeLocBad3() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0");
assertEquals(loc.getContigIndex(), 0);
assertEquals(loc.getStop(), 10); // the size
assertEquals(loc.getStart(), 1);
}
// test out the validating methods
@Test
public void testValidationOfGenomeLocs() {
assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1));
assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testValidateGenomeLoc() {
// bad contig index
genomeLocParser.validateGenomeLoc("chr1", 1, 1, 2, false);
}
private static class FlankingGenomeLocTestData extends TestDataProvider {
final GenomeLocParser parser;
final int basePairs;
final GenomeLoc original, flankStart, flankStop;
private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
super(FlankingGenomeLocTestData.class, name);
this.parser = parser;
this.basePairs = basePairs;
this.original = parse(parser, original);
this.flankStart = flankStart == null ? null : parse(parser, flankStart);
this.flankStop = flankStop == null ? null : parse(parser, flankStop);
}
private static GenomeLoc parse(GenomeLocParser parser, String str) {
return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
}
}
@DataProvider(name = "flankingGenomeLocs")
public Object[][] getFlankingGenomeLocs() {
int contigLength = 10000;
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
new FlankingGenomeLocTestData("atStartBase1", parser, 1,
"chr1:1", null, "chr1:2");
new FlankingGenomeLocTestData("atStartBase50", parser, 50,
"chr1:1", null, "chr1:2-51");
new FlankingGenomeLocTestData("atStartRange50", parser, 50,
"chr1:1-10", null, "chr1:11-60");
new FlankingGenomeLocTestData("atEndBase1", parser, 1,
"chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
new FlankingGenomeLocTestData("atEndBase50", parser, 50,
"chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
new FlankingGenomeLocTestData("atEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 10, contigLength),
String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
null);
new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
"chr1:2", "chr1:1", "chr1:3");
new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
"chr1:21-30", "chr1:1-20", "chr1:31-80");
new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
"chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
String.format("chr1:%d-%d", contigLength - 20, contigLength));
new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
"chr1:3", "chr1:2", "chr1:4");
new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
"chr1:101-200", "chr1:51-100", "chr1:201-250");
new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
"chr1:" + (contigLength - 3),
"chr1:" + (contigLength - 4),
"chr1:" + (contigLength - 2));
new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
new FlankingGenomeLocTestData("unmapped", parser, 50,
"unmapped", null, null);
new FlankingGenomeLocTestData("fullContig", parser, 50,
"chr1", null, null);
return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
}
@Test(dataProvider = "flankingGenomeLocs")
public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.flankStart);
assertEquals(actual, data.flankStart, description);
}
@Test(dataProvider = "flankingGenomeLocs")
public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.flankStop);
assertEquals(actual, data.flankStop, description);
}
@DataProvider(name = "parseGenomeLoc")
public Object[][] makeParsingTest() {
final List<Object[]> tests = new LinkedList<Object[]>();
tests.add(new Object[]{ "chr1:10", "chr1", 10 });
tests.add(new Object[]{ "chr1:100", "chr1", 100 });
tests.add(new Object[]{ "chr1:1000", "chr1", 1000 });
tests.add(new Object[]{ "chr1:1,000", "chr1", 1000 });
tests.add(new Object[]{ "chr1:10000", "chr1", 10000 });
tests.add(new Object[]{ "chr1:10,000", "chr1", 10000 });
tests.add(new Object[]{ "chr1:100000", "chr1", 100000 });
tests.add(new Object[]{ "chr1:100,000", "chr1", 100000 });
tests.add(new Object[]{ "chr1:1000000", "chr1", 1000000 });
tests.add(new Object[]{ "chr1:1,000,000", "chr1", 1000000 });
tests.add(new Object[]{ "chr1:1000,000", "chr1", 1000000 });
tests.add(new Object[]{ "chr1:1,000000", "chr1", 1000000 });
return tests.toArray(new Object[][]{});
}
@Test( dataProvider = "parseGenomeLoc")
public void testParsingPositions(final String string, final String contig, final int start) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000);
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string);
Assert.assertEquals(loc.getContig(), contig);
Assert.assertEquals(loc.getStart(), start);
Assert.assertEquals(loc.getStop(), start);
}
@Test( )
public void testCreationFromSAMRecord() {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
Assert.assertEquals(loc.getContig(), read.getReferenceName());
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
Assert.assertEquals(loc.getStop(), read.getAlignmentEnd());
}
@Test( )
public void testCreationFromSAMRecordUnmapped() {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
read.setReadUnmappedFlag(true);
read.setReferenceIndex(-1);
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
Assert.assertTrue(loc.isUnmapped());
}
@Test( )
public void testCreationFromSAMRecordUnmappedButOnGenome() {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
read.setReadUnmappedFlag(true);
read.setCigarString("*");
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
Assert.assertEquals(loc.getContig(), read.getReferenceName());
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
Assert.assertEquals(loc.getStop(), read.getAlignmentStart());
}
@Test
public void testCreationFromFeature() {
final Feature feature = new BasicFeature("chr1", 1, 5);
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
Assert.assertEquals(loc.getContig(), feature.getChr());
Assert.assertEquals(loc.getStart(), feature.getStart());
Assert.assertEquals(loc.getStop(), feature.getEnd());
}
@Test
public void testCreationFromVariantContext() {
final VariantContext feature = new VariantContextBuilder("x", "chr1", 1, 5, Arrays.asList(Allele.create("AAAAA", true))).make();
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
Assert.assertEquals(loc.getContig(), feature.getChr());
Assert.assertEquals(loc.getStart(), feature.getStart());
Assert.assertEquals(loc.getStop(), feature.getEnd());
}
@Test
public void testcreateGenomeLocOnContig() throws FileNotFoundException {
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
final SAMSequenceDictionary dict = seq.getSequenceDictionary();
final GenomeLocParser genomeLocParser = new GenomeLocParser(dict);
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
final GenomeLoc loc = genomeLocParser.createOverEntireContig(rec.getSequenceName());
Assert.assertEquals(loc.getContig(), rec.getSequenceName());
Assert.assertEquals(loc.getStart(), 1);
Assert.assertEquals(loc.getStop(), rec.getSequenceLength());
}
}
@DataProvider(name = "GenomeLocOnContig")
public Object[][] makeGenomeLocOnContig() {
final List<Object[]> tests = new LinkedList<Object[]>();
final int contigLength = header.getSequence(0).getSequenceLength();
for ( int start = -10; start < contigLength + 10; start++ ) {
for ( final int len : Arrays.asList(1, 10, 20) ) {
tests.add(new Object[]{ "chr1", start, start + len });
}
}
return tests.toArray(new Object[][]{});
}
@Test( dataProvider = "GenomeLocOnContig")
public void testGenomeLocOnContig(final String contig, final int start, final int stop) {
final int contigLength = header.getSequence(0).getSequenceLength();
final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, stop);
if ( stop < 1 || start > contigLength )
Assert.assertNull(loc, "GenomeLoc should be null if the start/stops are not meaningful");
else {
Assert.assertNotNull(loc);
Assert.assertEquals(loc.getContig(), contig);
Assert.assertEquals(loc.getStart(), Math.max(start, 1));
Assert.assertEquals(loc.getStop(), Math.min(stop, contigLength));
}
}
@DataProvider(name = "GenomeLocPadding")
public Object[][] makeGenomeLocPadding() {
final List<Object[]> tests = new LinkedList<Object[]>();
final int contigLength = header.getSequence(0).getSequenceLength();
for ( int pad = 0; pad < contigLength + 1; pad++) {
for ( int start = 1; start < contigLength; start++ ) {
for ( int stop = start; stop < contigLength; stop++ ) {
tests.add(new Object[]{ genomeLocParser.createGenomeLoc("chr1", start, stop), pad});
}
}
}
return tests.toArray(new Object[][]{});
}
@Test( dataProvider = "GenomeLocPadding")
public void testGenomeLocPadding(final GenomeLoc input, final int pad) {
final int contigLength = header.getSequence(0).getSequenceLength();
final GenomeLoc padded = genomeLocParser.createPaddedGenomeLoc(input, pad);
Assert.assertNotNull(padded);
Assert.assertEquals(padded.getContig(), input.getContig());
Assert.assertEquals(padded.getStart(), Math.max(input.getStart() - pad, 1));
Assert.assertEquals(padded.getStop(), Math.min(input.getStop() + pad, contigLength));
}
}