diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/DataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSource.java new file mode 100644 index 000000000..2ddcea799 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSource.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.dataSources; + +import org.broadinstitute.sting.gatk.dataSources.chunks.DataShard; + +/** + * User: aaron + * Date: Mar 25, 2009 + * Time: 6:20:00 PM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public interface DataSource { + + public DataShard toChunk(int chunkCount); +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceBuilder.java b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceBuilder.java new file mode 100644 index 000000000..ecbf23584 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceBuilder.java @@ -0,0 +1,74 @@ +package org.broadinstitute.sting.gatk.dataSources; + +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +import java.util.ArrayList; +import java.io.File; + +/** + * User: aaron + * Date: Mar 25, 2009 + * Time: 4:51:39 PM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class DataSourceBuilder { + + // storage for the passed file + ArrayList passFiles = new ArrayList(); + + public DataSourceBuilder() { + + } + + /** + * add a file used to generate the data sources + * + * @param fileName the filename that should be used + */ + public void addDataFile(String fileName) { + // for now, just add it to the internal file list + passFiles.add(new File(fileName)); + } + + /** + * add a file used to generate the data sources + * + * @param file the filename that should be used + */ + public void addDataFile(File file) { + // for now, just add it to the internal file list + passFiles.add(file); + } + + public DataSource build(Walker inputWalker) { + if (inputWalker instanceof ReadWalker) { + + } + + return null; + } + + + /** + * this section contains the private methods to create data sources + * based on the type of walker we're passed in. + */ + + + /** + * we know we have a read data source, let's get the + * @return + */ + //private ReadDataSource generateReadDataSource() { + // + //} + +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceGenerationException.java b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceGenerationException.java new file mode 100644 index 000000000..711335c41 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/DataSourceGenerationException.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.gatk.dataSources; + +/** + * User: aaron + * Date: Mar 26, 2009 + * Time: 9:25:49 AM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ + +/** + * This exception is throw when we're unable to generate a data source, + * most likely due to an incomplete input source list + */ +public class DataSourceGenerationException extends Exception { + + +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/ReadDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/ReadDataSource.java new file mode 100644 index 000000000..2f463a958 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/ReadDataSource.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.gatk.dataSources; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; + +import java.util.Iterator; +import java.io.File; + +import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator; +import org.broadinstitute.sting.gatk.iterators.ReferenceIterator; +import org.broadinstitute.sting.utils.FastaSequenceFile2; + +/** + * User: aaron + * Date: Mar 26, 2009 + * Time: 10:35:40 AM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class ReadDataSource { + + /** + * our SAM data files + */ + // our SAM reader + private SAMFileReader samReader = null; + // iterator over the sam records in the readsFile + private Iterator samReadIter = null; + + // The verifying iterator, it does checking + VerifyingSamIterator verifyingSamReadIter = null; + + + /** + * our reference data source + */ + // The reference data -- filename, refSeqFile, and iterator + private File refFileName = null; // the name of the reference file + //private ReferenceSequenceFile refFile = null; + private FastaSequenceFile2 refFile = null; // todo: merge FastaSequenceFile2 into picard! + private ReferenceIterator refIter = null; +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/BasicDataShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/BasicDataShard.java new file mode 100644 index 000000000..76f111674 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/BasicDataShard.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.gatk.dataSources.chunks; + +import java.util.ArrayList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: aaronmckenna + * Date: Mar 29, 2009 + * Time: 8:35:16 PM + * To change this template use File | Settings | File Templates. + */ +public class BasicDataShard implements DataShard { + + List list = new ArrayList(); + int index = 0; + + public BasicDataShard(List list) { + this.list = list; + } + + public boolean hasNext() { + if (list.size() > index) { + return true; + } + return false; + } + + public T next() { + return list.get(index); + } + + public void remove() { + list.remove(index); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/DataShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/DataShard.java new file mode 100644 index 000000000..d40f9b6f3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/DataShard.java @@ -0,0 +1,19 @@ +package org.broadinstitute.sting.gatk.dataSources.chunks; + +import java.util.Iterator; + +/** + * User: aaron + * Date: Mar 26, 2009 + * Time: 2:43:04 PM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public interface DataShard extends Iterator { +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/LociShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/LociShard.java new file mode 100644 index 000000000..7dc6913de --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/LociShard.java @@ -0,0 +1,99 @@ +package org.broadinstitute.sting.gatk.dataSources.chunks; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.dataSources.datum.LocusDatum; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.ReferenceIterator; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.ArrayList; +import java.util.List; + +/** + * + * User: aaron + * Date: Mar 30, 2009 + * Time: 7:01:56 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Mar 30, 2009 + *

+ * Class LociShard + *

+ * This is the loci shard, which are collectively made when a shatter call is made to + * a data source. + */ +public class LociShard implements DataShard { + + // our locusIterator + private final LocusIterator locusIterator; + + // our reference locusIterator + private final ReferenceIterator refIterator; + + // Iterator over rods + private final List rodIters; + + // the max number of iterations + private final int maxCount; + + // how many iterations we've had + private int iterCount = 0; + + public LociShard(LocusIterator locusIterator, ReferenceIterator refIterator, List rodIters, int maxCount) { + this.locusIterator = locusIterator; + this.maxCount = maxCount; + this.refIterator = refIterator; + this.rodIters = rodIters; + } + + public boolean hasNext() { + return locusIterator.hasNext() && maxCount > iterCount; + } + + public LocusDatum next() { + LocusContext locus = locusIterator.next(); + ReferenceIterator refSite = refIterator.seekForward(locus.getLocation()); + locus.setReferenceContig(refSite.getCurrentContig()); + // Iterate forward to get all reference ordered data covering this locus + final List rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation()); + return new LocusDatum(rodData, refSite.getBaseAsChar(), locus); + } + + public void remove() { + locusIterator.remove(); + } + + /** + * Builds a list of the reference ordered datum at loc from each of the iterators. This function + * assumes you are accessing the data in order. You can't use this function for random access. Each + * successive call moves you along the file, consuming all data before loc. + * + * @param rodIters Iterators to access the RODs + * @param loc The location to get the rods at + * @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list + */ + protected List getReferenceOrderedDataAtLocus(List rodIters, + final GenomeLoc loc) { + List data = new ArrayList(); + for (ReferenceOrderedData.RODIterator iter : rodIters) { + data.add(iter.seekForward(loc)); + } + return data; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/ReadShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/ReadShard.java new file mode 100644 index 000000000..a083ce45d --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/ReadShard.java @@ -0,0 +1,85 @@ +package org.broadinstitute.sting.gatk.dataSources.chunks; + +import edu.mit.broad.picard.sam.MergingSamRecordIterator; +import org.broadinstitute.sting.gatk.dataSources.datum.ReadDatum; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; +import java.util.Arrays; + +/** + * + * User: aaron + * Date: Mar 30, 2009 + * Time: 5:45:51 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Mar 30, 2009 + *

+ * Class ReadShard + *

+ * A read data shard. + */ +public class ReadShard implements DataShard { + + private MergingSamRecordIterator iterator; + + /** + * create the data chunk with an iterator, and a limiter + * + * @param samIterator + */ + public ReadShard(MergingSamRecordIterator samIterator) { + this.iterator = samIterator; + } + + /** + * do we have a next data point + * + * @return true if we have a data point + */ + public boolean hasNext() { + return iterator.hasNext(); + } + + public ReadDatum next() { + // get the read + final SAMRecord read = iterator.next(); + + // put the read into a list + final List reads = Arrays.asList(read); + + // put together the genome location + final GenomeLoc loc = Utils.genomicLocationOf(read); + + // Offset of a single read is always 0 + List offsets = Arrays.asList(0); + + // create the locus + final LocusContext locus = new LocusContext(loc, reads, offsets); + + // return the read datum + return new ReadDatum(read, locus); + } + + /** remove the current pointed to data source */ + public void remove() { + iterator.remove(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/SAMDataShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/SAMDataShard.java new file mode 100644 index 000000000..53b105dcf --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/chunks/SAMDataShard.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.gatk.dataSources.chunks; + +import edu.mit.broad.picard.sam.MergingSamRecordIterator; +import net.sf.samtools.SAMRecord; + +/** + * Created by IntelliJ IDEA. + * User: aaronmckenna + * Date: Mar 29, 2009 + * Time: 8:47:50 PM + * To change this template use File | Settings | File Templates. + */ +public class SAMDataShard implements DataShard { + + // our iterator + final private MergingSamRecordIterator iterator; + + // divide by reads or by loci + private boolean byReads = true; + + // iterator bounds limiter + private int lengthCount = 0; + private final int limiter; + + public SAMDataShard(MergingSamRecordIterator iterator, int limiter) { + this.iterator = iterator; + this.limiter = limiter; + } + + public SAMDataShard(MergingSamRecordIterator iterator) { + this.iterator = iterator; + limiter = Integer.MAX_VALUE; + } + + + public boolean hasNext() { + return iterator.hasNext() && lengthCount > limiter; + } + + public SAMRecord next() { + ++lengthCount; + return iterator.next(); + } + + public void remove() { + iterator.remove(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/datum/Datum.java b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/Datum.java new file mode 100644 index 000000000..6609e830a --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/Datum.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.gatk.dataSources.datum; + +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.io.Serializable; +/** + * + * User: aaron + * Date: Mar 30, 2009 + * Time: 1:32:34 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + +/** + * @author aaron + * @version 1.0 + * @date Mar 30, 2009 + *

+ * interface Datum + *

+ * The interface for all Datum Types. + */ +public interface Datum extends Serializable { + + // this function is used for tracking where we are in a genome + public GenomeLoc getSequenceLocation(); +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/datum/LocusDatum.java b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/LocusDatum.java new file mode 100644 index 000000000..1ab986732 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/LocusDatum.java @@ -0,0 +1,94 @@ +package org.broadinstitute.sting.gatk.dataSources.datum; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.List; + +/** + * + * User: aaron + * Date: Mar 30, 2009 + * Time: 3:08:28 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Mar 30, 2009 + *

+ * Class LocusDatum + *

+ * The datum for loci. It contains the reference base, locusContext, + * and the reference order data. + */ +public class LocusDatum implements Datum { + + // our reference order data + private final List rodData; + // our seq base + private final char ref; + // our locus context + private final LocusContext context; + + /** + * the locus dataum constructor + * + * @param rodData our reference data + * @param ref our reference sequence base position + * @param context the genome context we're in + */ + public LocusDatum(List rodData, char ref, LocusContext context) { + this.rodData = rodData; + this.ref = ref; + this.context = context; + } + + /** + * return the Reference order data for this position + * + * @return + */ + public List getRodData() { + return rodData; + } + + /** + * return the reference base + * + * @return a character representing the reference base + */ + public char getRef() { + return ref; + } + + /** + * get the locus context at the current position + * + * @return + */ + public LocusContext getContext() { + return context; + } + + /** + * gets the current postion in the sequence, which comes + * free from underlying data types + * + * @return our current GenomeLocation + */ + public GenomeLoc getSequenceLocation() { + return this.context.getLocation(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/datum/ReadDatum.java b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/ReadDatum.java new file mode 100644 index 000000000..5552af496 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/datum/ReadDatum.java @@ -0,0 +1,65 @@ +package org.broadinstitute.sting.gatk.dataSources.datum; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +/** + * + * User: aaron + * Date: Mar 30, 2009 + * Time: 2:53:37 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Mar 30, 2009 + *

+ * Class ReadDatum + *

+ * The base read datum class. + */ +public class ReadDatum implements Datum { + + // our SAM record + final private SAMRecord sam; + + // our locus context + final private LocusContext locus; + + // the constructor, taking a sam read and a locus + public ReadDatum(SAMRecord r, LocusContext locus) { + this.sam = r; + this.locus = locus; + } + + // get the SAMRecord + public SAMRecord getRead() { + return this.sam; + } + + // get the locus context + public LocusContext getLocus() { + return this.locus; + } + + /** + * gets the region that our read spans + * + * @return a genome loc that details the region that our read spans. + */ + public GenomeLoc getSequenceLocation() { + return Utils.genomicLocationOf(sam); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java new file mode 100644 index 000000000..61a913892 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java @@ -0,0 +1,117 @@ +package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; + +import edu.mit.broad.picard.io.IoUtil; +import edu.mit.broad.picard.sam.MergingSamRecordIterator; +import edu.mit.broad.picard.sam.SamFileHeaderMerger; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMFileWriter; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; + +/** + * User: aaron + * Date: Mar 26, 2009 + * Time: 2:36:16 PM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class SAMDataSource implements SimpleDataSource { + /** our SAM data files */ + private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate; + + // our sam file readers + private final ArrayList readers = new ArrayList(); + + // do we care that the SAM files respect the sort order. + private boolean matchedSortOrders = true; + + // our record iterator, we use it to iterate over all the reads + private MergingSamRecordIterator iterator = null; + + // we may want to write out the file + private SAMFileWriter out = null; + + // are we set to locus mode or read mode for dividing + private boolean locusMode = true; + + /** + * constructor for multiple sam files + * + * @param samfiles + */ + public SAMDataSource(ArrayList samfiles) throws FileNotFoundException { + loadFiles(samfiles); + } + + private void loadFiles(ArrayList samfiles) throws FileNotFoundException { + // verify the list passed to the class + ArrayList INPUT = new ArrayList(); + for (String check : samfiles) { + File nf = new File(check); + if (!nf.exists()) { + throw new FileNotFoundException(check + " doesn't exist"); + } + } + + + // Open the files for reading and writing + + List readers = new ArrayList(); + for (File inFile : INPUT) { + IoUtil.assertFileIsReadable(inFile); + SAMFileReader in = new SAMFileReader(inFile); + readers.add(in); + matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; + } + + // If all the input sort orders match the output sort order then just merge them and + // write on the fly, otherwise setup to merge and sort before writing out the final file + if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted) { + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER); + iterator = new MergingSamRecordIterator(headerMerger); + + } else { + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SAMFileHeader.SortOrder.unsorted); + iterator = new MergingSamRecordIterator(headerMerger); + SAMFileHeader header = headerMerger.getMergedHeader(); + header.setSortOrder(SORT_ORDER); + + } + } + + /** + * constructor, given a single sam file + * + * @param samFile + */ + public SAMDataSource(String samFile) throws FileNotFoundException { + ArrayList samfiles = new ArrayList(); + samfiles.add(samFile); + loadFiles(samfiles); + } + + /** + * Chunk the sam file at appropriate locations, given the chunk count + * + * @param chunkCount + * @return + */ + public void chunk(int chunkCount) { + + } + + /** set this source to divide on reads */ + public void setToReadMode() { + locusMode = true; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SimpleDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SimpleDataSource.java new file mode 100644 index 000000000..ea2916159 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SimpleDataSource.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; + +import java.io.Serializable; + +/** + * User: aaron + * Date: Mar 26, 2009 + * Time: 2:39:05 PM + *

+ * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public interface SimpleDataSource extends Serializable { + + /** + * recommend how many data chunks we should be breaking the file into, + * as a recommendated number. If not specified (and even if specified) + * the chunking data source can make decisions to chunk differently. + * + * @param chunkCount + */ + public void chunk(int chunkCount); + + +}