When you get the reference string for a read that is mapped partially off the end of a contig, the string is masked with X's for base positions without corresponding reference positions. Now with a test case!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1156 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
47cb9f169e
commit
bc17ff567a
|
|
@ -4,17 +4,29 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 22, 2009
|
||||
* Time: 12:24:23 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -65,10 +77,15 @@ public class LocusReferenceView extends ReferenceView {
|
|||
*/
|
||||
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||
long stop = genomeLoc.getStop();
|
||||
if (reference.getSequence(genomeLoc.getContig()).length() > genomeLoc.getStart()) {
|
||||
stop = reference.getSequence(genomeLoc.getContig()).length();
|
||||
long other = reference.getSequence(genomeLoc.getContig()).length();
|
||||
if (other < genomeLoc.getStop()) {
|
||||
stop = other;
|
||||
}
|
||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
||||
if (genomeLoc.getStop() - stop < 0) {
|
||||
int y = 0;
|
||||
y++;
|
||||
}
|
||||
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,76 +4,100 @@ import org.junit.Test;
|
|||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 27, 2009
|
||||
* Time: 11:10:00 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tests for viewing the reference from the perspective of a locus.
|
||||
*/
|
||||
/** Tests for viewing the reference from the perspective of a locus. */
|
||||
|
||||
public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
||||
/**
|
||||
* Multiple-base pair queries should generate exceptions.
|
||||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
/** Multiple-base pair queries should generate exceptions. */
|
||||
@Test(expected = InvalidPositionException.class)
|
||||
public void testSingleBPFailure() {
|
||||
Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) );
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(shard.getGenomeLoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries outside the bounds of the shard should generate an error.
|
||||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
public void testBoundsFailure() {
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) );
|
||||
@Test
|
||||
public void testOverlappingReferenceBases() {
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()));
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51));
|
||||
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
|
||||
Assert.assertEquals(20, results.length);
|
||||
for (int x = 0; x < results.length; x++) {
|
||||
if (x <= 10) Assert.assertTrue(results[x] != 'X');
|
||||
else Assert.assertTrue(results[x] == 'X');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Queries outside the bounds of the shard should generate an error. */
|
||||
@Test(expected = InvalidPositionException.class)
|
||||
public void testBoundsFailure() {
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0, 1, 50));
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0, 51));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compares the contents of the fasta and view at a specified location.
|
||||
*
|
||||
* @param loc
|
||||
*/
|
||||
protected void validateLocation( GenomeLoc loc ) {
|
||||
Shard shard = new LocusShard( loc );
|
||||
Shard shard = new LocusShard(loc);
|
||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
while( shardIterator.hasNext() ) {
|
||||
while (shardIterator.hasNext()) {
|
||||
GenomeLoc locus = shardIterator.next();
|
||||
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop());
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(), locus.getStart(), locus.getStop());
|
||||
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
||||
char actual = view.getReferenceBase(locus);
|
||||
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()),
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue