When you get the reference string for a read that is mapped partially off the end of a contig, the string is masked with X's for base positions without corresponding reference positions. Now with a test case!

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1156 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-02 14:15:50 +00:00
parent 47cb9f169e
commit bc17ff567a
2 changed files with 85 additions and 44 deletions

View File

@ -4,17 +4,29 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil;
/**
* User: hanna
* Date: May 22, 2009
* Time: 12:24:23 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
/*
* Copyright (c) 2009 The Broad Institute
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
@ -65,10 +77,15 @@ public class LocusReferenceView extends ReferenceView {
*/
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
long stop = genomeLoc.getStop();
if (reference.getSequence(genomeLoc.getContig()).length() > genomeLoc.getStart()) {
stop = reference.getSequence(genomeLoc.getContig()).length();
long other = reference.getSequence(genomeLoc.getContig()).length();
if (other < genomeLoc.getStop()) {
stop = other;
}
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
if (genomeLoc.getStop() - stop < 0) {
int y = 0;
y++;
}
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
}

View File

@ -4,76 +4,100 @@ import org.junit.Test;
import org.junit.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil;
/**
* User: hanna
* Date: May 27, 2009
* Time: 11:10:00 AM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
/*
* Copyright (c) 2009 The Broad Institute
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Tests for viewing the reference from the perspective of a locus.
*/
/** Tests for viewing the reference from the perspective of a locus. */
public class LocusReferenceViewTest extends ReferenceViewTemplate {
/**
* Multiple-base pair queries should generate exceptions.
*/
@Test(expected=InvalidPositionException.class)
/** Multiple-base pair queries should generate exceptions. */
@Test(expected = InvalidPositionException.class)
public void testSingleBPFailure() {
Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(shard.getGenomeLoc());
}
/**
* Queries outside the bounds of the shard should generate an error.
*/
@Test(expected=InvalidPositionException.class)
public void testBoundsFailure() {
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) );
@Test
public void testOverlappingReferenceBases() {
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()));
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51));
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
Assert.assertEquals(20, results.length);
for (int x = 0; x < results.length; x++) {
if (x <= 10) Assert.assertTrue(results[x] != 'X');
else Assert.assertTrue(results[x] == 'X');
}
}
/** Queries outside the bounds of the shard should generate an error. */
@Test(expected = InvalidPositionException.class)
public void testBoundsFailure() {
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0, 51));
}
/**
* Compares the contents of the fasta and view at a specified location.
*
* @param loc
*/
protected void validateLocation( GenomeLoc loc ) {
Shard shard = new LocusShard( loc );
Shard shard = new LocusShard(loc);
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
while( shardIterator.hasNext() ) {
while (shardIterator.hasNext()) {
GenomeLoc locus = shardIterator.next();
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop());
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(), locus.getStart(), locus.getStop());
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
char actual = view.getReferenceBase(locus);
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()),
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
expected,
actual);
}