Unit tests for the reference views. Partially addresses GSA-25.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@833 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9bd6489f8e
commit
8edba13ded
|
|
@ -0,0 +1,81 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 27, 2009
|
||||
* Time: 11:10:00 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tests for viewing the reference from the perspective of a locus.
|
||||
*/
|
||||
|
||||
public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
||||
/**
|
||||
* Multiple-base pair queries should generate exceptions.
|
||||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
public void testSingleBPFailure() {
|
||||
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(shard.getGenomeLoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries outside the bounds of the shard should generate an error.
|
||||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
public void testBoundsFailure() {
|
||||
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(new GenomeLoc(0,51));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compares the contents of the fasta and view at a specified location.
|
||||
* @param loc
|
||||
*/
|
||||
protected void validateLocation( GenomeLoc loc ) {
|
||||
Shard shard = new LocusShard( loc );
|
||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
while( shardIterator.hasNext() ) {
|
||||
GenomeLoc locus = shardIterator.next();
|
||||
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(),locus.getStart(),locus.getStop());
|
||||
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
||||
char actual = view.getReferenceBase(locus);
|
||||
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected",locus.toString(),shard.getGenomeLoc()),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.junit.Assert;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 27, 2009
|
||||
* Time: 1:04:27 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test reading the reference for a given read.
|
||||
*/
|
||||
|
||||
public class ReadReferenceViewTest extends ReferenceViewTemplate {
|
||||
/**
|
||||
* Compares the contents of the fasta and view at a specified location.
|
||||
* @param loc
|
||||
*/
|
||||
protected void validateLocation( GenomeLoc loc ) {
|
||||
SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null);
|
||||
ReadReferenceView view = new ReadReferenceView(dataProvider);
|
||||
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
char[] expected = StringUtil.bytesToString(expectedAsSeq.getBases()).toCharArray();
|
||||
char[] actual = view.getReferenceBases(read);
|
||||
|
||||
Assert.assertArrayEquals(String.format("Base array at in shard %s does not match expected",loc.toString()),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Build a SAM record featuring the absolute minimum required dataset.
|
||||
* TODO: Blatantly copied from LocusViewTemplate. Refactor these into a set of tools.
|
||||
* @param contig Contig to populate.
|
||||
* @param alignmentStart start of alignment
|
||||
* @param alignmentEnd end of alignment
|
||||
* @return New SAM Record
|
||||
*/
|
||||
protected SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSequenceDictionary(sequenceFile.getSequenceDictionary());
|
||||
|
||||
SAMRecord record = new SAMRecord(header);
|
||||
|
||||
record.setReferenceIndex(sequenceFile.getSequenceDictionary().getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
Cigar cigar = new Cigar();
|
||||
cigar.add(new CigarElement(alignmentEnd-alignmentStart+1, CigarOperator.M));
|
||||
record.setCigar(cigar);
|
||||
return record;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 27, 2009
|
||||
* Time: 1:12:35 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Template for testing reference views (ReadReferenceView and LocusReferenceView).
|
||||
*/
|
||||
|
||||
public abstract class ReferenceViewTemplate extends BaseTest {
|
||||
/**
|
||||
* The fasta, for comparison.
|
||||
*/
|
||||
protected static IndexedFastaSequenceFile sequenceFile = null;
|
||||
|
||||
//
|
||||
// The bulk of sequence retrieval is tested by IndexedFastaSequenceFile, but we'll run a few spot
|
||||
// checks here to make sure that data is flowing through the LocusReferenceView.
|
||||
|
||||
/**
|
||||
* Initialize the fasta.
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void initialize() throws FileNotFoundException {
|
||||
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
|
||||
GenomeLoc.setupRefContigOrdering(sequenceFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the initial fasta location.
|
||||
*/
|
||||
@Test
|
||||
public void testReferenceStart() {
|
||||
validateLocation( new GenomeLoc(0,1,25) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the end of a contig.
|
||||
*/
|
||||
@Test
|
||||
public void testReferenceEnd() {
|
||||
// Test the last 25 bases of the first contig.
|
||||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1);
|
||||
final long contigStart = selectedContig.getSequenceLength() - 24;
|
||||
final long contigStop = selectedContig.getSequenceLength();
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the start of the middle contig.
|
||||
*/
|
||||
@Test
|
||||
public void testContigStart() {
|
||||
// Test the last 25 bases of the first contig.
|
||||
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
|
||||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test the end of the middle contig.
|
||||
*/
|
||||
@Test
|
||||
public void testContigEnd() {
|
||||
// Test the last 25 bases of the first contig.
|
||||
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
|
||||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
|
||||
final long contigStart = selectedContig.getSequenceLength() - 24;
|
||||
final long contigStop = selectedContig.getSequenceLength();
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
}
|
||||
|
||||
protected abstract void validateLocation( GenomeLoc loc );
|
||||
}
|
||||
Loading…
Reference in New Issue