Unit tests for the reference views. Partially addresses GSA-25.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@833 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-27 17:49:45 +00:00
parent 9bd6489f8e
commit 8edba13ded
3 changed files with 250 additions and 0 deletions

View File

@ -0,0 +1,81 @@
package org.broadinstitute.sting.gatk.dataSources.providers;
import org.junit.Test;
import org.junit.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import edu.mit.broad.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil;
/**
* User: hanna
* Date: May 27, 2009
* Time: 11:10:00 AM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Tests for viewing the reference from the perspective of a locus.
*/
public class LocusReferenceViewTest extends ReferenceViewTemplate {
/**
* Multiple-base pair queries should generate exceptions.
*/
@Test(expected=InvalidPositionException.class)
public void testSingleBPFailure() {
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(shard.getGenomeLoc());
}
/**
* Queries outside the bounds of the shard should generate an error.
*/
@Test(expected=InvalidPositionException.class)
public void testBoundsFailure() {
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(new GenomeLoc(0,51));
}
/**
* Compares the contents of the fasta and view at a specified location.
* @param loc
*/
protected void validateLocation( GenomeLoc loc ) {
Shard shard = new LocusShard( loc );
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
while( shardIterator.hasNext() ) {
GenomeLoc locus = shardIterator.next();
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop());
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
char actual = view.getReferenceBase(locus);
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()),
expected,
actual);
}
}
}

View File

@ -0,0 +1,74 @@
package org.broadinstitute.sting.gatk.dataSources.providers;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.Assert;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.util.StringUtil;
import edu.mit.broad.picard.reference.ReferenceSequence;
/**
* User: hanna
* Date: May 27, 2009
* Time: 1:04:27 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Test reading the reference for a given read.
*/
public class ReadReferenceViewTest extends ReferenceViewTemplate {
/**
* Compares the contents of the fasta and view at a specified location.
* @param loc
*/
protected void validateLocation( GenomeLoc loc ) {
SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() );
ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null);
ReadReferenceView view = new ReadReferenceView(dataProvider);
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop());
char[] expected = StringUtil.bytesToString(expectedAsSeq.getBases()).toCharArray();
char[] actual = view.getReferenceBases(read);
Assert.assertArrayEquals(String.format("Base array at in shard %s does not match expected",loc.toString()),
expected,
actual);
}
/**
* Build a SAM record featuring the absolute minimum required dataset.
* TODO: Blatantly copied from LocusViewTemplate. Refactor these into a set of tools.
* @param contig Contig to populate.
* @param alignmentStart start of alignment
* @param alignmentEnd end of alignment
* @return New SAM Record
*/
protected SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(sequenceFile.getSequenceDictionary());
SAMRecord record = new SAMRecord(header);
record.setReferenceIndex(sequenceFile.getSequenceDictionary().getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar();
cigar.add(new CigarElement(alignmentEnd-alignmentStart+1, CigarOperator.M));
record.setCigar(cigar);
return record;
}
}

View File

@ -0,0 +1,95 @@
package org.broadinstitute.sting.gatk.dataSources.providers;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.BaseTest;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.File;
import net.sf.samtools.SAMSequenceRecord;
/**
* User: hanna
* Date: May 27, 2009
* Time: 1:12:35 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Template for testing reference views (ReadReferenceView and LocusReferenceView).
*/
public abstract class ReferenceViewTemplate extends BaseTest {
/**
* The fasta, for comparison.
*/
protected static IndexedFastaSequenceFile sequenceFile = null;
//
// The bulk of sequence retrieval is tested by IndexedFastaSequenceFile, but we'll run a few spot
// checks here to make sure that data is flowing through the LocusReferenceView.
/**
* Initialize the fasta.
*/
@BeforeClass
public static void initialize() throws FileNotFoundException {
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
GenomeLoc.setupRefContigOrdering(sequenceFile);
}
/**
* Test the initial fasta location.
*/
@Test
public void testReferenceStart() {
validateLocation( new GenomeLoc(0,1,25) );
}
/**
* Test the end of a contig.
*/
@Test
public void testReferenceEnd() {
// Test the last 25 bases of the first contig.
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1);
final long contigStart = selectedContig.getSequenceLength() - 24;
final long contigStop = selectedContig.getSequenceLength();
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
}
/**
* Test the start of the middle contig.
*/
@Test
public void testContigStart() {
// Test the last 25 bases of the first contig.
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) );
}
/**
* Test the end of the middle contig.
*/
@Test
public void testContigEnd() {
// Test the last 25 bases of the first contig.
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
final long contigStart = selectedContig.getSequenceLength() - 24;
final long contigStop = selectedContig.getSequenceLength();
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
}
protected abstract void validateLocation( GenomeLoc loc );
}