Added second method for getting large sequences of the reference for use in reads traversals.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@645 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
517f27f331
commit
522f8b58be
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -14,10 +15,49 @@ import net.sf.samtools.util.StringUtil;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReferenceProvider {
|
||||
private IndexedFastaSequenceFile sequenceFile;
|
||||
private Shard shard;
|
||||
|
||||
// Lazy
|
||||
private ReferenceSequence referenceSequence;
|
||||
private GenomeLoc referenceInterval;
|
||||
|
||||
public ReferenceProvider( IndexedFastaSequenceFile sequenceFile, Shard shard ) {
|
||||
this.sequenceFile = sequenceFile;
|
||||
this.shard = shard;
|
||||
}
|
||||
|
||||
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
||||
if( referenceSequence == null )
|
||||
lazyInitializeLocusAccess();
|
||||
|
||||
validateLocation( genomeLoc );
|
||||
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
|
||||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the bases of the reference that are aligned to the given read.
|
||||
* @param read the read for which to extract reference information.
|
||||
* @return The bases corresponding to this read, or null if the read is unmapped.
|
||||
*/
|
||||
public char[] getReferenceBases( SAMRecord read ) {
|
||||
if( read.getReadUnmappedFlag() )
|
||||
return null;
|
||||
|
||||
String contig = read.getReferenceName();
|
||||
int start = read.getAlignmentStart();
|
||||
int stop = read.getAlignmentEnd();
|
||||
|
||||
ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop );
|
||||
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a lazy initialization of access to the locus. Sets up the reference sequence and
|
||||
* limits the user to work only at that sequence.
|
||||
*/
|
||||
private void lazyInitializeLocusAccess() {
|
||||
GenomeLoc position = shard.getGenomeLoc();
|
||||
this.referenceSequence = sequenceFile.getSubsequenceAt( position.getContig(),
|
||||
position.getStart(),
|
||||
|
|
@ -25,12 +65,6 @@ public class ReferenceProvider {
|
|||
this.referenceInterval = position;
|
||||
}
|
||||
|
||||
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
||||
validateLocation( genomeLoc );
|
||||
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
|
||||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
||||
* @param genomeLoc location to verify.
|
||||
|
|
|
|||
Loading…
Reference in New Issue