diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceViewTest.java new file mode 100755 index 000000000..58c5c92c9 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceViewTest.java @@ -0,0 +1,81 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.junit.Test; +import org.junit.Assert; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard; +import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; + +import edu.mit.broad.picard.reference.ReferenceSequence; +import net.sf.samtools.util.StringUtil; +/** + * User: hanna + * Date: May 27, 2009 + * Time: 11:10:00 AM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Tests for viewing the reference from the perspective of a locus. + */ + +public class LocusReferenceViewTest extends ReferenceViewTemplate { + /** + * Multiple-base pair queries should generate exceptions. + */ + @Test(expected=InvalidPositionException.class) + public void testSingleBPFailure() { + Shard shard = new LocusShard( new GenomeLoc(0,1,50) ); + + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null); + LocusReferenceView view = new LocusReferenceView(dataProvider); + + view.getReferenceBase(shard.getGenomeLoc()); + } + + /** + * Queries outside the bounds of the shard should generate an error. + */ + @Test(expected=InvalidPositionException.class) + public void testBoundsFailure() { + Shard shard = new LocusShard( new GenomeLoc(0,1,50) ); + + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null); + LocusReferenceView view = new LocusReferenceView(dataProvider); + + view.getReferenceBase(new GenomeLoc(0,51)); + } + + + /** + * Compares the contents of the fasta and view at a specified location. + * @param loc + */ + protected void validateLocation( GenomeLoc loc ) { + Shard shard = new LocusShard( loc ); + GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc()); + + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null); + LocusReferenceView view = new LocusReferenceView(dataProvider); + + while( shardIterator.hasNext() ) { + GenomeLoc locus = shardIterator.next(); + + ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop()); + char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0); + char actual = view.getReferenceBase(locus); + + Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()), + expected, + actual); + } + } + +} diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceViewTest.java new file mode 100755 index 000000000..b01249565 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceViewTest.java @@ -0,0 +1,74 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.junit.Assert; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.util.StringUtil; +import edu.mit.broad.picard.reference.ReferenceSequence; +/** + * User: hanna + * Date: May 27, 2009 + * Time: 1:04:27 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Test reading the reference for a given read. + */ + +public class ReadReferenceViewTest extends ReferenceViewTemplate { + /** + * Compares the contents of the fasta and view at a specified location. + * @param loc + */ + protected void validateLocation( GenomeLoc loc ) { + SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() ); + + ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null); + ReadReferenceView view = new ReadReferenceView(dataProvider); + + ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop()); + char[] expected = StringUtil.bytesToString(expectedAsSeq.getBases()).toCharArray(); + char[] actual = view.getReferenceBases(read); + + Assert.assertArrayEquals(String.format("Base array at in shard %s does not match expected",loc.toString()), + expected, + actual); + } + + + /** + * Build a SAM record featuring the absolute minimum required dataset. + * TODO: Blatantly copied from LocusViewTemplate. Refactor these into a set of tools. + * @param contig Contig to populate. + * @param alignmentStart start of alignment + * @param alignmentEnd end of alignment + * @return New SAM Record + */ + protected SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) { + SAMFileHeader header = new SAMFileHeader(); + header.setSequenceDictionary(sequenceFile.getSequenceDictionary()); + + SAMRecord record = new SAMRecord(header); + + record.setReferenceIndex(sequenceFile.getSequenceDictionary().getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + Cigar cigar = new Cigar(); + cigar.add(new CigarElement(alignmentEnd-alignmentStart+1, CigarOperator.M)); + record.setCigar(cigar); + return record; + } + + +} diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceViewTemplate.java new file mode 100755 index 000000000..54c254633 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceViewTemplate.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.BaseTest; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.FileNotFoundException; +import java.io.File; + +import net.sf.samtools.SAMSequenceRecord; +/** + * User: hanna + * Date: May 27, 2009 + * Time: 1:12:35 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Template for testing reference views (ReadReferenceView and LocusReferenceView). + */ + +public abstract class ReferenceViewTemplate extends BaseTest { + /** + * The fasta, for comparison. + */ + protected static IndexedFastaSequenceFile sequenceFile = null; + + // + // The bulk of sequence retrieval is tested by IndexedFastaSequenceFile, but we'll run a few spot + // checks here to make sure that data is flowing through the LocusReferenceView. + + /** + * Initialize the fasta. + */ + @BeforeClass + public static void initialize() throws FileNotFoundException { + sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") ); + GenomeLoc.setupRefContigOrdering(sequenceFile); + } + + /** + * Test the initial fasta location. + */ + @Test + public void testReferenceStart() { + validateLocation( new GenomeLoc(0,1,25) ); + } + + /** + * Test the end of a contig. + */ + @Test + public void testReferenceEnd() { + // Test the last 25 bases of the first contig. + SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); + final long contigStart = selectedContig.getSequenceLength() - 24; + final long contigStop = selectedContig.getSequenceLength(); + validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + } + + /** + * Test the start of the middle contig. + */ + @Test + public void testContigStart() { + // Test the last 25 bases of the first contig. + int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; + SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); + validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) ); + } + + + /** + * Test the end of the middle contig. + */ + @Test + public void testContigEnd() { + // Test the last 25 bases of the first contig. + int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; + SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); + final long contigStart = selectedContig.getSequenceLength() - 24; + final long contigStop = selectedContig.getSequenceLength(); + validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + } + + protected abstract void validateLocation( GenomeLoc loc ); +}