Added second method for getting large sequences of the reference for use in reads traversals.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@645 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-08 22:18:04 +00:00
parent 517f27f331
commit 522f8b58be
1 changed files with 40 additions and 6 deletions

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import edu.mit.broad.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
@ -14,10 +15,49 @@ import net.sf.samtools.util.StringUtil;
* To change this template use File | Settings | File Templates.
*/
public class ReferenceProvider {
private IndexedFastaSequenceFile sequenceFile;
private Shard shard;
// Lazy
private ReferenceSequence referenceSequence;
private GenomeLoc referenceInterval;
public ReferenceProvider( IndexedFastaSequenceFile sequenceFile, Shard shard ) {
this.sequenceFile = sequenceFile;
this.shard = shard;
}
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
if( referenceSequence == null )
lazyInitializeLocusAccess();
validateLocation( genomeLoc );
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
}
/**
* Gets the bases of the reference that are aligned to the given read.
* @param read the read for which to extract reference information.
* @return The bases corresponding to this read, or null if the read is unmapped.
*/
public char[] getReferenceBases( SAMRecord read ) {
if( read.getReadUnmappedFlag() )
return null;
String contig = read.getReferenceName();
int start = read.getAlignmentStart();
int stop = read.getAlignmentEnd();
ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop );
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
}
/**
* Perform a lazy initialization of access to the locus. Sets up the reference sequence and
* limits the user to work only at that sequence.
*/
private void lazyInitializeLocusAccess() {
GenomeLoc position = shard.getGenomeLoc();
this.referenceSequence = sequenceFile.getSubsequenceAt( position.getContig(),
position.getStart(),
@ -25,12 +65,6 @@ public class ReferenceProvider {
this.referenceInterval = position;
}
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
validateLocation( genomeLoc );
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
}
/**
* Validates that the genomeLoc is one base wide and is in the reference sequence.
* @param genomeLoc location to verify.