Matt make a good point that the Reference Iterator we were using wasn't bounded; The BoundedReferenceIterator takes a GenomeLoc to bound the iterations by

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@305 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-04-06 23:03:56 +00:00
parent 5a5c6d1276
commit 4aa9c0d591
2 changed files with 119 additions and 9 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
import org.broadinstitute.sting.gatk.iterators.BoundedReferenceIterator;
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -44,17 +45,11 @@ public class ReferenceDataSource implements SimpleDataSource {
* @param location the genome location to extract data for
* @return an iterator of the appropriate type, that is limited by the region
*/
public ReferenceIterator seek(GenomeLoc location) {
ReferenceIterator refSite = refIter.seekForward(location);
return refSite;
public BoundedReferenceIterator seek(GenomeLoc location) {
BoundedReferenceIterator ret = new BoundedReferenceIterator(refIter.seekForward(location), location);
return ret;
}
/**
* Constructor - ReferenceDataSource
*
* @param refFileName the reference file
* @throws SimpleDataSourceLoadException
*/
public ReferenceDataSource(String refFileName) throws SimpleDataSourceLoadException {
if (refFileName == null) {
throw new SimpleDataSourceLoadException("ReferenceDataSource: refFileName passed in is null");
@ -63,6 +58,7 @@ public class ReferenceDataSource implements SimpleDataSource {
if (!infile.canRead()) {
throw new SimpleDataSourceLoadException("ReferenceDataSource: Unable to load file: " + refFileName);
}
//this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
refFile = new FastaSequenceFile2(new File(refFileName));
refIter = new ReferenceIterator(this.refFile);

View File

@ -0,0 +1,114 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
/**
*
* User: aaron
* Date: Apr 2, 2009
* Time: 2:12:12 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 2, 2009
* <p/>
* Class EnclosedIterator
* <p/>
* This class is a decorator class from Reference Iterator (though it is constrained
* by the fact that referenceIterator.seekForwardOffset explicitly returns a referenceIterator
* for now
* <p/>
* TODO: Fix the underlying iterator and this class to model a real decorator pattern
*/
public class BoundedReferenceIterator implements Iterator<ReferenceIterator> {
// the location to screen over
private final GenomeLoc mLoc;
private final ReferenceIterator referenceIterator;
/**
* Default constructor
*
* @param referenceIterator
* @param loc
*/
public BoundedReferenceIterator(ReferenceIterator referenceIterator, GenomeLoc loc) {
this.referenceIterator = referenceIterator;
this.mLoc = loc;
}
/**
* isSubRegion
* <p/>
* returns true if we include the whole passed in region
*
* @param loc the genome region to check
* @return true if we include THE WHOLE specified region
*/
protected boolean isSubRegion(GenomeLoc loc) {
// if the location is null, we assume we're all inclusive (we represent the whole genome).
if (mLoc == null || loc.isBetween(mLoc, mLoc)) {
return true;
}
return false;
}
/**
* returns true if we include the whole passed in region
*
* @param contig
* @param start
* @param stop
* @return true if we enclose the passed region, false otherwise
*/
protected boolean isSubRegion(final String contig, final int start, final int stop) {
final GenomeLoc lc = new GenomeLoc(contig, start, stop);
return isSubRegion(lc);
}
/**
* If we're less then the limiting genomeLoc
*
* @param loc
* @return
*/
protected boolean isLessThan(GenomeLoc loc) {
return loc.isPast(mLoc);
}
// our adapted next function
public boolean hasNext() {
// first check that we are within the search place
GenomeLoc loc = referenceIterator.getLocation();
if (!isSubRegion(loc)) {
return false;
}
return referenceIterator.hasNext();
}
public ReferenceIterator next() {
return referenceIterator.next();
}
public void remove() {
referenceIterator.remove();
}
}