checking in some of the more static Data Source dependent code at this point. They don't do much on their own, but are need for the base data source code I'm writing.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@231 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7fda409f4e
commit
ba99e9f648
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.dataSources.chunks.DataShard;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 6:20:00 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public interface DataSource {
|
||||
|
||||
public DataShard toChunk(int chunkCount);
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 4:51:39 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class DataSourceBuilder {
|
||||
|
||||
// storage for the passed file
|
||||
ArrayList<File> passFiles = new ArrayList<File>();
|
||||
|
||||
public DataSourceBuilder() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* add a file used to generate the data sources
|
||||
*
|
||||
* @param fileName the filename that should be used
|
||||
*/
|
||||
public void addDataFile(String fileName) {
|
||||
// for now, just add it to the internal file list
|
||||
passFiles.add(new File(fileName));
|
||||
}
|
||||
|
||||
/**
|
||||
* add a file used to generate the data sources
|
||||
*
|
||||
* @param file the filename that should be used
|
||||
*/
|
||||
public void addDataFile(File file) {
|
||||
// for now, just add it to the internal file list
|
||||
passFiles.add(file);
|
||||
}
|
||||
|
||||
public DataSource build(Walker inputWalker) {
|
||||
if (inputWalker instanceof ReadWalker) {
|
||||
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this section contains the private methods to create data sources
|
||||
* based on the type of walker we're passed in.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* we know we have a read data source, let's get the
|
||||
* @return
|
||||
*/
|
||||
//private ReadDataSource generateReadDataSource() {
|
||||
//
|
||||
//}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 9:25:49 AM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This exception is throw when we're unable to generate a data source,
|
||||
* most likely due to an incomplete input source list
|
||||
*/
|
||||
public class DataSourceGenerationException extends Exception {
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.io.File;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 10:35:40 AM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class ReadDataSource {
|
||||
|
||||
/**
|
||||
* our SAM data files
|
||||
*/
|
||||
// our SAM reader
|
||||
private SAMFileReader samReader = null;
|
||||
// iterator over the sam records in the readsFile
|
||||
private Iterator<SAMRecord> samReadIter = null;
|
||||
|
||||
// The verifying iterator, it does checking
|
||||
VerifyingSamIterator verifyingSamReadIter = null;
|
||||
|
||||
|
||||
/**
|
||||
* our reference data source
|
||||
*/
|
||||
// The reference data -- filename, refSeqFile, and iterator
|
||||
private File refFileName = null; // the name of the reference file
|
||||
//private ReferenceSequenceFile refFile = null;
|
||||
private FastaSequenceFile2 refFile = null; // todo: merge FastaSequenceFile2 into picard!
|
||||
private ReferenceIterator refIter = null;
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.chunks;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Mar 29, 2009
|
||||
* Time: 8:35:16 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class BasicDataShard<T> implements DataShard {
|
||||
|
||||
List<T> list = new ArrayList<T>();
|
||||
int index = 0;
|
||||
|
||||
public BasicDataShard(List<T> list) {
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (list.size() > index) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public T next() {
|
||||
return list.get(index);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
list.remove(index);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.chunks;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:43:04 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public interface DataShard extends Iterator {
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.chunks;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.dataSources.datum.LocusDatum;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 7:01:56 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class LociShard
|
||||
* <p/>
|
||||
* This is the loci shard, which are collectively made when a shatter call is made to
|
||||
* a data source.
|
||||
*/
|
||||
public class LociShard implements DataShard {
|
||||
|
||||
// our locusIterator
|
||||
private final LocusIterator locusIterator;
|
||||
|
||||
// our reference locusIterator
|
||||
private final ReferenceIterator refIterator;
|
||||
|
||||
// Iterator over rods
|
||||
private final List<ReferenceOrderedData.RODIterator> rodIters;
|
||||
|
||||
// the max number of iterations
|
||||
private final int maxCount;
|
||||
|
||||
// how many iterations we've had
|
||||
private int iterCount = 0;
|
||||
|
||||
public LociShard(LocusIterator locusIterator, ReferenceIterator refIterator, List<ReferenceOrderedData.RODIterator> rodIters, int maxCount) {
|
||||
this.locusIterator = locusIterator;
|
||||
this.maxCount = maxCount;
|
||||
this.refIterator = refIterator;
|
||||
this.rodIters = rodIters;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return locusIterator.hasNext() && maxCount > iterCount;
|
||||
}
|
||||
|
||||
public LocusDatum next() {
|
||||
LocusContext locus = locusIterator.next();
|
||||
ReferenceIterator refSite = refIterator.seekForward(locus.getLocation());
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
||||
return new LocusDatum(rodData, refSite.getBaseAsChar(), locus);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
locusIterator.remove();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a list of the reference ordered datum at loc from each of the iterators. This function
|
||||
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
||||
* successive call moves you along the file, consuming all data before loc.
|
||||
*
|
||||
* @param rodIters Iterators to access the RODs
|
||||
* @param loc The location to get the rods at
|
||||
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
||||
*/
|
||||
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
||||
final GenomeLoc loc) {
|
||||
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
||||
for (ReferenceOrderedData.RODIterator iter : rodIters) {
|
||||
data.add(iter.seekForward(loc));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.chunks;
|
||||
|
||||
import edu.mit.broad.picard.sam.MergingSamRecordIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.datum.ReadDatum;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 5:45:51 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class ReadShard
|
||||
* <p/>
|
||||
* A read data shard.
|
||||
*/
|
||||
public class ReadShard implements DataShard {
|
||||
|
||||
private MergingSamRecordIterator iterator;
|
||||
|
||||
/**
|
||||
* create the data chunk with an iterator, and a limiter
|
||||
*
|
||||
* @param samIterator
|
||||
*/
|
||||
public ReadShard(MergingSamRecordIterator samIterator) {
|
||||
this.iterator = samIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have a next data point
|
||||
*
|
||||
* @return true if we have a data point
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
public ReadDatum next() {
|
||||
// get the read
|
||||
final SAMRecord read = iterator.next();
|
||||
|
||||
// put the read into a list
|
||||
final List<SAMRecord> reads = Arrays.asList(read);
|
||||
|
||||
// put together the genome location
|
||||
final GenomeLoc loc = Utils.genomicLocationOf(read);
|
||||
|
||||
// Offset of a single read is always 0
|
||||
List<Integer> offsets = Arrays.asList(0);
|
||||
|
||||
// create the locus
|
||||
final LocusContext locus = new LocusContext(loc, reads, offsets);
|
||||
|
||||
// return the read datum
|
||||
return new ReadDatum(read, locus);
|
||||
}
|
||||
|
||||
/** remove the current pointed to data source */
|
||||
public void remove() {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.chunks;
|
||||
|
||||
import edu.mit.broad.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Mar 29, 2009
|
||||
* Time: 8:47:50 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SAMDataShard implements DataShard {
|
||||
|
||||
// our iterator
|
||||
final private MergingSamRecordIterator iterator;
|
||||
|
||||
// divide by reads or by loci
|
||||
private boolean byReads = true;
|
||||
|
||||
// iterator bounds limiter
|
||||
private int lengthCount = 0;
|
||||
private final int limiter;
|
||||
|
||||
public SAMDataShard(MergingSamRecordIterator iterator, int limiter) {
|
||||
this.iterator = iterator;
|
||||
this.limiter = limiter;
|
||||
}
|
||||
|
||||
public SAMDataShard(MergingSamRecordIterator iterator) {
|
||||
this.iterator = iterator;
|
||||
limiter = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext() && lengthCount > limiter;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
++lengthCount;
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 1:32:34 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* interface Datum
|
||||
* <p/>
|
||||
* The interface for all Datum Types.
|
||||
*/
|
||||
public interface Datum extends Serializable {
|
||||
|
||||
// this function is used for tracking where we are in a genome
|
||||
public GenomeLoc getSequenceLocation();
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 3:08:28 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class LocusDatum
|
||||
* <p/>
|
||||
* The datum for loci. It contains the reference base, locusContext,
|
||||
* and the reference order data.
|
||||
*/
|
||||
public class LocusDatum implements Datum {
|
||||
|
||||
// our reference order data
|
||||
private final List<ReferenceOrderedDatum> rodData;
|
||||
// our seq base
|
||||
private final char ref;
|
||||
// our locus context
|
||||
private final LocusContext context;
|
||||
|
||||
/**
|
||||
* the locus dataum constructor
|
||||
*
|
||||
* @param rodData our reference data
|
||||
* @param ref our reference sequence base position
|
||||
* @param context the genome context we're in
|
||||
*/
|
||||
public LocusDatum(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||
this.rodData = rodData;
|
||||
this.ref = ref;
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the Reference order data for this position
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<ReferenceOrderedDatum> getRodData() {
|
||||
return rodData;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the reference base
|
||||
*
|
||||
* @return a character representing the reference base
|
||||
*/
|
||||
public char getRef() {
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the locus context at the current position
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public LocusContext getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the current postion in the sequence, which comes
|
||||
* free from underlying data types
|
||||
*
|
||||
* @return our current GenomeLocation
|
||||
*/
|
||||
public GenomeLoc getSequenceLocation() {
|
||||
return this.context.getLocation();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.datum;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Mar 30, 2009
|
||||
* Time: 2:53:37 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Mar 30, 2009
|
||||
* <p/>
|
||||
* Class ReadDatum
|
||||
* <p/>
|
||||
* The base read datum class.
|
||||
*/
|
||||
public class ReadDatum implements Datum {
|
||||
|
||||
// our SAM record
|
||||
final private SAMRecord sam;
|
||||
|
||||
// our locus context
|
||||
final private LocusContext locus;
|
||||
|
||||
// the constructor, taking a sam read and a locus
|
||||
public ReadDatum(SAMRecord r, LocusContext locus) {
|
||||
this.sam = r;
|
||||
this.locus = locus;
|
||||
}
|
||||
|
||||
// get the SAMRecord
|
||||
public SAMRecord getRead() {
|
||||
return this.sam;
|
||||
}
|
||||
|
||||
// get the locus context
|
||||
public LocusContext getLocus() {
|
||||
return this.locus;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the region that our read spans
|
||||
*
|
||||
* @return a genome loc that details the region that our read spans.
|
||||
*/
|
||||
public GenomeLoc getSequenceLocation() {
|
||||
return Utils.genomicLocationOf(sam);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import edu.mit.broad.picard.io.IoUtil;
|
||||
import edu.mit.broad.picard.sam.MergingSamRecordIterator;
|
||||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:36:16 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class SAMDataSource implements SimpleDataSource {
|
||||
/** our SAM data files */
|
||||
private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
// our sam file readers
|
||||
private final ArrayList<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||
|
||||
// do we care that the SAM files respect the sort order.
|
||||
private boolean matchedSortOrders = true;
|
||||
|
||||
// our record iterator, we use it to iterate over all the reads
|
||||
private MergingSamRecordIterator iterator = null;
|
||||
|
||||
// we may want to write out the file
|
||||
private SAMFileWriter out = null;
|
||||
|
||||
// are we set to locus mode or read mode for dividing
|
||||
private boolean locusMode = true;
|
||||
|
||||
/**
|
||||
* constructor for multiple sam files
|
||||
*
|
||||
* @param samfiles
|
||||
*/
|
||||
public SAMDataSource(ArrayList<String> samfiles) throws FileNotFoundException {
|
||||
loadFiles(samfiles);
|
||||
}
|
||||
|
||||
private void loadFiles(ArrayList<String> samfiles) throws FileNotFoundException {
|
||||
// verify the list passed to the class
|
||||
ArrayList<File> INPUT = new ArrayList<File>();
|
||||
for (String check : samfiles) {
|
||||
File nf = new File(check);
|
||||
if (!nf.exists()) {
|
||||
throw new FileNotFoundException(check + " doesn't exist");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Open the files for reading and writing
|
||||
|
||||
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||
for (File inFile : INPUT) {
|
||||
IoUtil.assertFileIsReadable(inFile);
|
||||
SAMFileReader in = new SAMFileReader(inFile);
|
||||
readers.add(in);
|
||||
matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
|
||||
}
|
||||
|
||||
// If all the input sort orders match the output sort order then just merge them and
|
||||
// write on the fly, otherwise setup to merge and sort before writing out the final file
|
||||
if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted) {
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
|
||||
iterator = new MergingSamRecordIterator(headerMerger);
|
||||
|
||||
} else {
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SAMFileHeader.SortOrder.unsorted);
|
||||
iterator = new MergingSamRecordIterator(headerMerger);
|
||||
SAMFileHeader header = headerMerger.getMergedHeader();
|
||||
header.setSortOrder(SORT_ORDER);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor, given a single sam file
|
||||
*
|
||||
* @param samFile
|
||||
*/
|
||||
public SAMDataSource(String samFile) throws FileNotFoundException {
|
||||
ArrayList<String> samfiles = new ArrayList<String>();
|
||||
samfiles.add(samFile);
|
||||
loadFiles(samfiles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk the sam file at appropriate locations, given the chunk count
|
||||
*
|
||||
* @param chunkCount
|
||||
* @return
|
||||
*/
|
||||
public void chunk(int chunkCount) {
|
||||
|
||||
}
|
||||
|
||||
/** set this source to divide on reads */
|
||||
public void setToReadMode() {
|
||||
locusMode = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:39:05 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public interface SimpleDataSource extends Serializable {
|
||||
|
||||
/**
|
||||
* recommend how many data chunks we should be breaking the file into,
|
||||
* as a recommendated number. If not specified (and even if specified)
|
||||
* the chunking data source can make decisions to chunk differently.
|
||||
*
|
||||
* @param chunkCount
|
||||
*/
|
||||
public void chunk(int chunkCount);
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue