removing some files from the old approach to dataSource
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@303 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5feb7ee627
commit
0fc8a90553
|
|
@ -1,21 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.DataShard;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 6:20:00 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public interface DataSource {
|
||||
|
||||
public DataShard toChunk(int chunkCount);
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 4:51:39 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class DataSourceBuilder {
|
||||
|
||||
// storage for the passed file
|
||||
ArrayList<File> passFiles = new ArrayList<File>();
|
||||
|
||||
public DataSourceBuilder() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* add a file used to generate the data sources
|
||||
*
|
||||
* @param fileName the filename that should be used
|
||||
*/
|
||||
public void addDataFile(String fileName) {
|
||||
// for now, just add it to the internal file list
|
||||
passFiles.add(new File(fileName));
|
||||
}
|
||||
|
||||
/**
|
||||
* add a file used to generate the data sources
|
||||
*
|
||||
* @param file the filename that should be used
|
||||
*/
|
||||
public void addDataFile(File file) {
|
||||
// for now, just add it to the internal file list
|
||||
passFiles.add(file);
|
||||
}
|
||||
|
||||
public DataSource build(Walker inputWalker) {
|
||||
if (inputWalker instanceof ReadWalker) {
|
||||
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this section contains the private methods to create data sources
|
||||
* based on the type of walker we're passed in.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* we know we have a read data source, let's get the
|
||||
* @return
|
||||
*/
|
||||
//private ReadDataSource generateReadDataSource() {
|
||||
//
|
||||
//}
|
||||
|
||||
}
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 1:32:34 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* interface Datum
|
||||
* <p/>
|
||||
* The interface for all Datum Types.
|
||||
*/
|
||||
public interface Datum extends Serializable {
|
||||
|
||||
// this function is used for tracking where we are in a genome
|
||||
public GenomeLoc getSequenceLocation();
|
||||
}
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 3:08:28 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class LocusDatum
|
||||
* <p/>
|
||||
* The datum for loci. It contains the reference base, locusContext,
|
||||
* and the reference order data.
|
||||
*/
|
||||
public class LocusDatum implements Datum {
|
||||
|
||||
// our reference order data
|
||||
private final List<ReferenceOrderedDatum> rodData;
|
||||
// our seq base
|
||||
private final char ref;
|
||||
// our locus context
|
||||
private final LocusContext context;
|
||||
|
||||
/**
|
||||
* the locus dataum constructor
|
||||
*
|
||||
* @param rodData our reference data
|
||||
* @param ref our reference sequence base position
|
||||
* @param context the genome context we're in
|
||||
*/
|
||||
public LocusDatum(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||
this.rodData = rodData;
|
||||
this.ref = ref;
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the Reference order data for this position
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<ReferenceOrderedDatum> getRodData() {
|
||||
return rodData;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the reference base
|
||||
*
|
||||
* @return a character representing the reference base
|
||||
*/
|
||||
public char getRef() {
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the locus context at the current position
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public LocusContext getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the current postion in the sequence, which comes
|
||||
* free from underlying data types
|
||||
*
|
||||
* @return our current GenomeLocation
|
||||
*/
|
||||
public GenomeLoc getSequenceLocation() {
|
||||
return this.context.getLocation();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 2:53:37 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class ReadDatum
|
||||
* <p/>
|
||||
* The base read datum class.
|
||||
*/
|
||||
public class ReadDatum implements Datum {
|
||||
|
||||
// our SAM record
|
||||
final private SAMRecord sam;
|
||||
|
||||
// our locus context
|
||||
final private LocusContext locus;
|
||||
|
||||
// the constructor, taking a sam read and a locus
|
||||
public ReadDatum(SAMRecord r, LocusContext locus) {
|
||||
this.sam = r;
|
||||
this.locus = locus;
|
||||
}
|
||||
|
||||
// get the SAMRecord
|
||||
public SAMRecord getRead() {
|
||||
return this.sam;
|
||||
}
|
||||
|
||||
// get the locus context
|
||||
public LocusContext getLocus() {
|
||||
return this.locus;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the region that our read spans
|
||||
*
|
||||
* @return a genome loc that details the region that our read spans.
|
||||
*/
|
||||
public GenomeLoc getSequenceLocation() {
|
||||
return GenomeLoc.genomicLocationOf(sam);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Mar 29, 2009
|
||||
* Time: 8:35:16 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class BasicDataShard<T> implements DataShard {
|
||||
|
||||
List<T> list = new ArrayList<T>();
|
||||
int index = 0;
|
||||
|
||||
public BasicDataShard(List<T> list) {
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (list.size() > index) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public T next() {
|
||||
return list.get(index);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
list.remove(index);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:43:04 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public interface DataShard extends Iterator {
|
||||
}
|
||||
|
|
@ -1,99 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.dataSources.datum.LocusDatum;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 7:01:56 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class LociShard
|
||||
* <p/>
|
||||
* This is the loci shard, which are collectively made when a shatter call is made to
|
||||
* a data source.
|
||||
*/
|
||||
public class LociShard implements DataShard {
|
||||
|
||||
// our locusIterator
|
||||
private final LocusIterator locusIterator;
|
||||
|
||||
// our reference locusIterator
|
||||
private final ReferenceIterator refIterator;
|
||||
|
||||
// Iterator over rods
|
||||
private final List<ReferenceOrderedData.RODIterator> rodIters;
|
||||
|
||||
// the max number of iterations
|
||||
private final int maxCount;
|
||||
|
||||
// how many iterations we've had
|
||||
private int iterCount = 0;
|
||||
|
||||
public LociShard(LocusIterator locusIterator, ReferenceIterator refIterator, List<ReferenceOrderedData.RODIterator> rodIters, int maxCount) {
|
||||
this.locusIterator = locusIterator;
|
||||
this.maxCount = maxCount;
|
||||
this.refIterator = refIterator;
|
||||
this.rodIters = rodIters;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return locusIterator.hasNext() && maxCount > iterCount;
|
||||
}
|
||||
|
||||
public LocusDatum next() {
|
||||
LocusContext locus = locusIterator.next();
|
||||
ReferenceIterator refSite = refIterator.seekForward(locus.getLocation());
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
||||
return new LocusDatum(rodData, refSite.getBaseAsChar(), locus);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
locusIterator.remove();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a list of the reference ordered datum at loc from each of the iterators. This function
|
||||
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
||||
* successive call moves you along the file, consuming all data before loc.
|
||||
*
|
||||
* @param rodIters Iterators to access the RODs
|
||||
* @param loc The location to get the rods at
|
||||
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
||||
*/
|
||||
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
||||
final GenomeLoc loc) {
|
||||
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
||||
for (ReferenceOrderedData.RODIterator iter : rodIters) {
|
||||
data.add(iter.seekForward(loc));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import edu.mit.broad.picard.sam.MergingSamRecordIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.datum.ReadDatum;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 5:45:51 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class ReadShard
|
||||
* <p/>
|
||||
* A read data shard.
|
||||
*/
|
||||
public class ReadShard implements DataShard {
|
||||
|
||||
private MergingSamRecordIterator iterator;
|
||||
|
||||
/**
|
||||
* create the data chunk with an iterator, and a limiter
|
||||
*
|
||||
* @param samIterator
|
||||
*/
|
||||
public ReadShard(MergingSamRecordIterator samIterator) {
|
||||
this.iterator = samIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have a next data point
|
||||
*
|
||||
* @return true if we have a data point
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
public ReadDatum next() {
|
||||
// get the read
|
||||
final SAMRecord read = iterator.next();
|
||||
|
||||
// put the read into a list
|
||||
final List<SAMRecord> reads = Arrays.asList(read);
|
||||
|
||||
// put together the genome location
|
||||
final GenomeLoc loc = GenomeLoc.genomicLocationOf(read);
|
||||
|
||||
// Offset of a single read is always 0
|
||||
List<Integer> offsets = Arrays.asList(0);
|
||||
|
||||
// create the locus
|
||||
final LocusContext locus = new LocusContext(loc, reads, offsets);
|
||||
|
||||
// return the read datum
|
||||
return new ReadDatum(read, locus);
|
||||
}
|
||||
|
||||
/** remove the current pointed to data source */
|
||||
public void remove() {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import edu.mit.broad.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Mar 29, 2009
|
||||
* Time: 8:47:50 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SAMDataShard implements DataShard {
|
||||
|
||||
// our iterator
|
||||
final private MergingSamRecordIterator iterator;
|
||||
|
||||
// divide by reads or by loci
|
||||
private boolean byReads = true;
|
||||
|
||||
// iterator bounds limiter
|
||||
private int lengthCount = 0;
|
||||
private final int limiter;
|
||||
|
||||
public SAMDataShard(MergingSamRecordIterator iterator, int limiter) {
|
||||
this.iterator = iterator;
|
||||
this.limiter = limiter;
|
||||
}
|
||||
|
||||
public SAMDataShard(MergingSamRecordIterator iterator) {
|
||||
this.iterator = iterator;
|
||||
limiter = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext() && lengthCount > limiter;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
++lengthCount;
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue