When you get the reference string for a read that is mapped partially off the end of a contig, the string is masked with X's for base positions without corresponding reference positions. Now with a test case!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1156 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
47cb9f169e
commit
bc17ff567a
|
|
@ -4,17 +4,29 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
/**
|
/*
|
||||||
* User: hanna
|
* Copyright (c) 2009 The Broad Institute
|
||||||
* Date: May 22, 2009
|
|
||||||
* Time: 12:24:23 PM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
*
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
* obtaining a copy of this software and associated documentation
|
||||||
* functionality.
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -65,10 +77,15 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
*/
|
*/
|
||||||
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||||
long stop = genomeLoc.getStop();
|
long stop = genomeLoc.getStop();
|
||||||
if (reference.getSequence(genomeLoc.getContig()).length() > genomeLoc.getStart()) {
|
long other = reference.getSequence(genomeLoc.getContig()).length();
|
||||||
stop = reference.getSequence(genomeLoc.getContig()).length();
|
if (other < genomeLoc.getStop()) {
|
||||||
|
stop = other;
|
||||||
}
|
}
|
||||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
||||||
|
if (genomeLoc.getStop() - stop < 0) {
|
||||||
|
int y = 0;
|
||||||
|
y++;
|
||||||
|
}
|
||||||
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,76 +4,100 @@ import org.junit.Test;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
/**
|
/*
|
||||||
* User: hanna
|
* Copyright (c) 2009 The Broad Institute
|
||||||
* Date: May 27, 2009
|
|
||||||
* Time: 11:10:00 AM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
*
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
* obtaining a copy of this software and associated documentation
|
||||||
* functionality.
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/** Tests for viewing the reference from the perspective of a locus. */
|
||||||
* Tests for viewing the reference from the perspective of a locus.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
||||||
/**
|
/** Multiple-base pair queries should generate exceptions. */
|
||||||
* Multiple-base pair queries should generate exceptions.
|
@Test(expected = InvalidPositionException.class)
|
||||||
*/
|
|
||||||
@Test(expected=InvalidPositionException.class)
|
|
||||||
public void testSingleBPFailure() {
|
public void testSingleBPFailure() {
|
||||||
Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) );
|
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
|
||||||
|
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||||
|
|
||||||
view.getReferenceBase(shard.getGenomeLoc());
|
view.getReferenceBase(shard.getGenomeLoc());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Test
|
||||||
* Queries outside the bounds of the shard should generate an error.
|
public void testOverlappingReferenceBases() {
|
||||||
*/
|
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()));
|
||||||
@Test(expected=InvalidPositionException.class)
|
|
||||||
public void testBoundsFailure() {
|
|
||||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) );
|
|
||||||
|
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||||
|
|
||||||
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51));
|
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
|
||||||
|
Assert.assertEquals(20, results.length);
|
||||||
|
for (int x = 0; x < results.length; x++) {
|
||||||
|
if (x <= 10) Assert.assertTrue(results[x] != 'X');
|
||||||
|
else Assert.assertTrue(results[x] == 'X');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Queries outside the bounds of the shard should generate an error. */
|
||||||
|
@Test(expected = InvalidPositionException.class)
|
||||||
|
public void testBoundsFailure() {
|
||||||
|
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
|
||||||
|
|
||||||
|
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||||
|
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||||
|
|
||||||
|
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0, 51));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compares the contents of the fasta and view at a specified location.
|
* Compares the contents of the fasta and view at a specified location.
|
||||||
|
*
|
||||||
* @param loc
|
* @param loc
|
||||||
*/
|
*/
|
||||||
protected void validateLocation( GenomeLoc loc ) {
|
protected void validateLocation( GenomeLoc loc ) {
|
||||||
Shard shard = new LocusShard( loc );
|
Shard shard = new LocusShard(loc);
|
||||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
|
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
|
||||||
|
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||||
|
|
||||||
while( shardIterator.hasNext() ) {
|
while (shardIterator.hasNext()) {
|
||||||
GenomeLoc locus = shardIterator.next();
|
GenomeLoc locus = shardIterator.next();
|
||||||
|
|
||||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop());
|
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(), locus.getStart(), locus.getStop());
|
||||||
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
||||||
char actual = view.getReferenceBase(locus);
|
char actual = view.getReferenceBase(locus);
|
||||||
|
|
||||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()),
|
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
|
||||||
expected,
|
expected,
|
||||||
actual);
|
actual);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue