A few minor modifications masquerading as significant changes according to
svn's logs: - Copied BAM indexing engine from Picard back into the GATK anticipating shard merging algorithm. Tried to leave most of the building blocks in Picard. If this turns into a logistical nightmare, I'll merge the building blocks into the GATK as well. - Reorganized the org.broadinstitute.sting.gatk.datasources package, giving better separation of query and management functionality for reads, ref, rmd, and samples. - Merged Shard building blocks into org.broadinstitute.sting.gatk.datasources. reads package, indicating it's current strong relationship with the reads, rather than the general unifying element I wish this would be. - Collapsed BAMFormatAwareShard into Shard. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5184 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7af003666d
commit
5c3198520c
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package net.sf.picard.reference;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A temporary solution to work around Java access rights issues:
|
||||
* override BAMFileSpan and make it public.
|
||||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||
*/
|
||||
public class GATKBAMFileSpan extends BAMFileSpan {
|
||||
/**
|
||||
* Create a new empty list of chunks.
|
||||
*/
|
||||
public GATKBAMFileSpan() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor to construct a BAM file span from
|
||||
* a single chunk.
|
||||
* @param chunk Chunk to use as the sole region in this span.
|
||||
*/
|
||||
public GATKBAMFileSpan(final Chunk chunk) {
|
||||
super(chunk);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new chunk list from the given list of chunks.
|
||||
* @param chunks Constituent chunks.
|
||||
*/
|
||||
public GATKBAMFileSpan(final List<GATKChunk> chunks) {
|
||||
super(new ArrayList<Chunk>(chunks));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A temporary solution to work around Java access rights issues:
|
||||
* override GATKBin and make it public.
|
||||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||
*/
|
||||
public class GATKBin extends Bin {
|
||||
public GATKBin(final int referenceSequence, final int binNumber) {
|
||||
super(referenceSequence,binNumber);
|
||||
}
|
||||
|
||||
public GATKBin(final Bin bin) {
|
||||
super(bin.getReferenceSequence(),bin.getBinNumber());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getReferenceSequence() {
|
||||
return super.getReferenceSequence();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBinNumber() {
|
||||
return super.getBinNumber();
|
||||
}
|
||||
|
||||
public List<GATKChunk> getGATKChunkList() {
|
||||
List<GATKChunk> gatkChunks = new ArrayList<GATKChunk>();
|
||||
for(Chunk chunk: getChunkList())
|
||||
gatkChunks.add(new GATKChunk(chunk));
|
||||
return gatkChunks;
|
||||
}
|
||||
|
||||
public void setGATKChunkList(List<GATKChunk> chunks) {
|
||||
super.setChunkList(new ArrayList<Chunk>(chunks));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A temporary solution to work around Java access rights issues:
|
||||
* override chunk and make it public.
|
||||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||
*/
|
||||
public class GATKBinList extends BinList {
|
||||
/**
|
||||
* Create a new BinList over sequenceCount sequences, consisting of the given bins.
|
||||
* @param referenceSequence Reference sequence to which these bins are relevant.
|
||||
* @param bins The given bins to include.
|
||||
*/
|
||||
public GATKBinList(final int referenceSequence, final BitSet bins) {
|
||||
super(referenceSequence,bins);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the bins stored in this list.
|
||||
* @return A bitset where a bin is present in the list if the bit is true.
|
||||
*/
|
||||
public BitSet getBins() {
|
||||
return super.getBins();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
/**
|
||||
* A temporary solution to work around Java access rights issues:
|
||||
* override chunk and make it public.
|
||||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||
*/
|
||||
public class GATKChunk extends Chunk {
|
||||
public GATKChunk(final long start, final long stop) {
|
||||
super(start,stop);
|
||||
}
|
||||
|
||||
public GATKChunk(final Chunk chunk) {
|
||||
super(chunk.getChunkStart(),chunk.getChunkEnd());
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKChunk clone() {
|
||||
return new GATKChunk(getChunkStart(),getChunkEnd());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChunkStart() {
|
||||
return super.getChunkStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setChunkStart(final long value) {
|
||||
super.setChunkStart(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChunkEnd() {
|
||||
return super.getChunkEnd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setChunkEnd(final long value) {
|
||||
super.setChunkEnd(value);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
|||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
|
|
|
|||
|
|
@ -36,16 +36,16 @@ import org.broadinstitute.sting.commandline.CommandLineUtils;
|
|||
import org.broadinstitute.sting.commandline.ParsingEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||
|
|
@ -66,10 +66,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5,10 +5,9 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
/**
|
||||
* User: hanna
|
||||
|
|
|
|||
|
|
@ -27,9 +27,8 @@ package org.broadinstitute.sting.gatk;
|
|||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 9:25:49 AM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This exception is throw when we're unable to generate a data source,
|
||||
* most likely due to an incomplete input source list
|
||||
*/
|
||||
public class DataSourceGenerationException extends ReviewedStingException {
|
||||
public DataSourceGenerationException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public DataSourceGenerationException(String message, Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,13 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.RODMergingIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.GATKBin;
|
||||
import net.sf.samtools.GATKChunk;
|
||||
import net.sf.samtools.LinearIndex;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Represents the contents of a bam index file for one reference.
|
||||
* A BAM index (.bai) file contains information for all references in the bam file.
|
||||
* This class describes the data present in the index file for one of these references;
|
||||
* including the bins, chunks, and linear index.
|
||||
*/
|
||||
class BAMIndexContent {
|
||||
/**
|
||||
* The reference sequence for the data currently loaded.
|
||||
*/
|
||||
private final int mReferenceSequence;
|
||||
|
||||
/**
|
||||
* A list of all bins in the above reference sequence.
|
||||
*/
|
||||
private final BinList mBinList;
|
||||
|
||||
/**
|
||||
* The linear index for the reference sequence above.
|
||||
*/
|
||||
private final LinearIndex mLinearIndex;
|
||||
|
||||
|
||||
/**
|
||||
* @param referenceSequence Content corresponds to this reference.
|
||||
* @param bins Array of bins represented by this content, possibly sparse
|
||||
* @param numberOfBins Number of non-null bins
|
||||
* @param linearIndex Additional index used to optimize queries
|
||||
*/
|
||||
BAMIndexContent(final int referenceSequence, final GATKBin[] bins, final int numberOfBins, final LinearIndex linearIndex) {
|
||||
this.mReferenceSequence = referenceSequence;
|
||||
this.mBinList = new BinList(bins, numberOfBins);
|
||||
this.mLinearIndex = linearIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reference for this Content
|
||||
*/
|
||||
public int getReferenceSequence() {
|
||||
return mReferenceSequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this content have anything in this bin?
|
||||
*/
|
||||
public boolean containsBin(final GATKBin bin) {
|
||||
return mBinList.getBin(bin.getBinNumber()) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return iterable list of bins represented by this content
|
||||
*/
|
||||
public BinList getBins() {
|
||||
return mBinList;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of non-null bins represented by this content
|
||||
*/
|
||||
int getNumberOfNonNullBins() {
|
||||
return mBinList.getNumberOfNonNullBins();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all chunks associated with all bins in this content
|
||||
*/
|
||||
public List<GATKChunk> getAllChunks() {
|
||||
List<GATKChunk> allChunks = new ArrayList<GATKChunk>();
|
||||
for (GATKBin b : mBinList)
|
||||
if (b.getChunkList() != null) {
|
||||
allChunks.addAll(b.getGATKChunkList());
|
||||
}
|
||||
return Collections.unmodifiableList(allChunks);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the linear index represented by this content
|
||||
*/
|
||||
public LinearIndex getLinearIndex() {
|
||||
return mLinearIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is used to encapsulate the list of Bins store in the BAMIndexContent
|
||||
* While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
|
||||
*/
|
||||
class BinList implements Iterable<GATKBin> {
|
||||
|
||||
private final GATKBin[] mBinArray;
|
||||
public final int numberOfNonNullBins;
|
||||
public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
|
||||
|
||||
/**
|
||||
* @param binArray a sparse array representation of the bins. The index into the array is the bin number.
|
||||
* @param numberOfNonNullBins
|
||||
*/
|
||||
BinList(GATKBin[] binArray, int numberOfNonNullBins) {
|
||||
this.mBinArray = binArray;
|
||||
this.numberOfNonNullBins = numberOfNonNullBins;
|
||||
this.maxBinNumber = mBinArray.length - 1;
|
||||
}
|
||||
|
||||
GATKBin getBin(int binNumber) {
|
||||
if (binNumber > maxBinNumber) return null;
|
||||
return mBinArray[binNumber];
|
||||
}
|
||||
|
||||
int getNumberOfNonNullBins() {
|
||||
return numberOfNonNullBins;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over all non-null bins.
|
||||
*
|
||||
* @return An iterator over all bins.
|
||||
*/
|
||||
public Iterator<GATKBin> iterator() {
|
||||
return new BinIterator();
|
||||
}
|
||||
|
||||
private class BinIterator implements Iterator<GATKBin> {
|
||||
/**
|
||||
* Stores the bin # of the Bin currently in use.
|
||||
*/
|
||||
private int nextBin;
|
||||
|
||||
public BinIterator() {
|
||||
nextBin = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are there more bins in this set, waiting to be returned?
|
||||
*
|
||||
* @return True if more bins are remaining.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
while (nextBin <= maxBinNumber) {
|
||||
if (getBin(nextBin) != null) return true;
|
||||
nextBin++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next bin in the provided BinList.
|
||||
*
|
||||
* @return the next available bin in the BinList.
|
||||
*/
|
||||
public GATKBin next() {
|
||||
if (!hasNext())
|
||||
throw new NoSuchElementException("This BinIterator is currently empty");
|
||||
GATKBin result = getBin(nextBin);
|
||||
nextBin++;
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from a bin iterator");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.Bin;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Models a bin at which all BAM files in the merged input stream overlap.
|
||||
*/
|
||||
class BAMOverlap {
|
||||
public final int start;
|
||||
public final int stop;
|
||||
|
||||
private final Map<SAMReaderID,Bin> bins = new HashMap<SAMReaderID,Bin>();
|
||||
|
||||
public BAMOverlap(final int start, final int stop) {
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public void addBin(final SAMReaderID id, final Bin bin) {
|
||||
bins.put(id,bin);
|
||||
}
|
||||
|
||||
public Bin getBin(final SAMReaderID id) {
|
||||
return bins.get(id);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Represents a small section of a BAM file, and every associated interval.
|
||||
*/
|
||||
class FilePointer {
|
||||
protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||
protected final String referenceSequence;
|
||||
protected final BAMOverlap overlap;
|
||||
protected final List<GenomeLoc> locations;
|
||||
|
||||
/**
|
||||
* Does this file pointer point into an unmapped region?
|
||||
*/
|
||||
protected final boolean isRegionUnmapped;
|
||||
|
||||
public FilePointer(final GenomeLoc location) {
|
||||
this.referenceSequence = location.getContig();
|
||||
this.overlap = null;
|
||||
this.locations = Collections.singletonList(location);
|
||||
this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
|
||||
}
|
||||
|
||||
public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.overlap = overlap;
|
||||
this.locations = new ArrayList<GenomeLoc>();
|
||||
this.isRegionUnmapped = false;
|
||||
}
|
||||
|
||||
public void addLocation(GenomeLoc location) {
|
||||
locations.add(location);
|
||||
}
|
||||
|
||||
public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) {
|
||||
this.fileSpans.put(id,fileSpan);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
/**
|
||||
* Constants used in reading & writing BAM files
|
||||
*/
|
||||
class GATKBAMFileConstants {
|
||||
/**
|
||||
* The beginning of a BAMRecord is a fixed-size block of 8 int32s
|
||||
*/
|
||||
static final int FIXED_BLOCK_SIZE = 8 * 4;
|
||||
|
||||
/**
|
||||
* Sanity check -- we never expect BAMRecords to be as big as this.
|
||||
*/
|
||||
static final int MAXIMUM_RECORD_LENGTH = 1024 * 1024;
|
||||
|
||||
/**
|
||||
* BAM file magic number. This is what is present in the gunzipped version of the file,
|
||||
* which never exists on disk.
|
||||
*/
|
||||
|
||||
static final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||
/**
|
||||
* BAM index file magic number.
|
||||
*/
|
||||
static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
|
||||
}
|
||||
|
|
@ -0,0 +1,575 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.BAMIndex;
|
||||
import net.sf.samtools.BAMIndexMetaData;
|
||||
import net.sf.samtools.Bin;
|
||||
import net.sf.samtools.BrowseableBAMIndex;
|
||||
import net.sf.samtools.GATKBAMFileSpan;
|
||||
import net.sf.samtools.GATKBin;
|
||||
import net.sf.samtools.GATKBinList;
|
||||
import net.sf.samtools.GATKChunk;
|
||||
import net.sf.samtools.LinearIndex;
|
||||
import net.sf.samtools.SAMException;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A basic interface for querying BAM indices.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||
/**
|
||||
* Reports the total amount of genomic data that any bin can index.
|
||||
*/
|
||||
protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
|
||||
|
||||
/**
|
||||
* What is the starting bin for each level?
|
||||
*/
|
||||
private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
||||
|
||||
/**
|
||||
* Reports the maximum number of bins that can appear in a BAM file.
|
||||
*/
|
||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||
|
||||
public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1];
|
||||
|
||||
private final File mFile;
|
||||
private final MappedByteBuffer mFileBuffer;
|
||||
|
||||
private SAMSequenceDictionary mBamDictionary = null;
|
||||
|
||||
private Integer mLastReferenceRetrieved = null;
|
||||
private WeakHashMap<Integer,BAMIndexContent> mQueriesByReference = new WeakHashMap<Integer,BAMIndexContent>();
|
||||
|
||||
public GATKBAMIndex(final File file, final SAMSequenceDictionary dictionary) {
|
||||
mFile = file;
|
||||
mBamDictionary = dictionary;
|
||||
// Open the file stream.
|
||||
try {
|
||||
FileInputStream fileStream = new FileInputStream(mFile);
|
||||
FileChannel fileChannel = fileStream.getChannel();
|
||||
mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size());
|
||||
mFileBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
fileChannel.close();
|
||||
fileStream.close();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new RuntimeIOException(exc.getMessage(), exc);
|
||||
}
|
||||
|
||||
// Verify the magic number.
|
||||
seek(0);
|
||||
final byte[] buffer = new byte[4];
|
||||
readBytes(buffer);
|
||||
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
|
||||
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of levels employed by this index.
|
||||
* @return Number of levels in this index.
|
||||
*/
|
||||
public static int getNumIndexLevels() {
|
||||
return LEVEL_STARTS.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first bin in the given level.
|
||||
* @param levelNumber Level number. 0-based.
|
||||
* @return The first bin in this level.
|
||||
*/
|
||||
public static int getFirstBinInLevel(final int levelNumber) {
|
||||
return LEVEL_STARTS[levelNumber];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of bins in the given level.
|
||||
* @param levelNumber Level number. 0-based.
|
||||
* @return The size (number of possible bins) of the given level.
|
||||
*/
|
||||
public int getLevelSize(final int levelNumber) {
|
||||
if(levelNumber == getNumIndexLevels())
|
||||
return MAX_BINS+1-LEVEL_STARTS[levelNumber];
|
||||
else
|
||||
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
@Override
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||
throw new SAMException("Tried to get level for invalid bin.");
|
||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||
return i;
|
||||
}
|
||||
throw new SAMException("Unable to find correct bin for bin "+bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
@Override
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||
}
|
||||
|
||||
public int getNumberOfReferences() {
|
||||
seek(4);
|
||||
return readInteger();
|
||||
}
|
||||
|
||||
/**
|
||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||
* @return The file offset of the first record in the last linear bin, or -1
|
||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||
*/
|
||||
public long getStartOfLastLinearBin() {
|
||||
seek(4);
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||
// grab the last element of the linear index for each sequence, and return
|
||||
// the last one from the last sequence that has one.
|
||||
long lastLinearIndexPointer = -1;
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j1 = 0; j1 < nBins; j1++) {
|
||||
// Skip bin #
|
||||
skipBytes(4);
|
||||
final int nChunks = readInteger();
|
||||
// Skip chunks
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
if (nLinearBins > 0) {
|
||||
// Skip to last element of list of linear bins
|
||||
skipBytes(8 * (nLinearBins - 1));
|
||||
lastLinearIndexPointer = readLong();
|
||||
}
|
||||
}
|
||||
|
||||
return lastLinearIndexPointer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records
|
||||
* @param reference the reference of interest
|
||||
* @return meta data for the reference
|
||||
*/
|
||||
public BAMIndexMetaData getMetaData(int reference) {
|
||||
throw new UnsupportedOperationException("Cannot retrieve metadata for GATKBAMIndex");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns count of records unassociated with any reference. Call before the index file is closed
|
||||
*
|
||||
* @return meta data at the end of the bam index that indicates count of records holding no coordinates
|
||||
* or null if no meta data (old index format)
|
||||
*/
|
||||
public Long getNoCoordinateCount() {
|
||||
|
||||
seek(4);
|
||||
final int sequenceCount = readInteger();
|
||||
|
||||
skipToSequence(sequenceCount);
|
||||
try { // in case of old index file without meta data
|
||||
return readLong();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of regions of BAM file that may contain SAMRecords for the given range
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return the virtual file position. Each pair is the first and last virtual file position
|
||||
* in a range that can be scanned to find SAMRecords that overlap the given positions.
|
||||
*/
|
||||
@Override
|
||||
public GATKBAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
|
||||
BAMIndexContent queryResults = getQueryResults(referenceIndex);
|
||||
|
||||
if(queryResults == null)
|
||||
return null;
|
||||
|
||||
GATKBinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
|
||||
|
||||
// System.out.println("# Sequence target TID: " + referenceIndex);
|
||||
List<GATKBin> bins = new ArrayList<GATKBin>();
|
||||
for(GATKBin bin: queryResults.getBins()) {
|
||||
if (overlappingBins.getBins().get(bin.getBinNumber()))
|
||||
bins.add(bin);
|
||||
}
|
||||
|
||||
if (bins.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||
for(GATKBin bin: bins) {
|
||||
for(GATKChunk chunk: bin.getGATKChunkList())
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
if (chunkList.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
|
||||
return new GATKBAMFileSpan(chunkList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an overlapping query of all bins bounding the given location.
|
||||
* @param bin The bin over which to perform an overlapping query.
|
||||
* @return The file pointers
|
||||
*/
|
||||
@Override
|
||||
public GATKBAMFileSpan getSpanOverlapping(final Bin bin) {
|
||||
if(bin == null)
|
||||
return null;
|
||||
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
|
||||
final int referenceSequence = gatkBin.getReferenceSequence();
|
||||
BAMIndexContent indexQuery = getQueryResults(referenceSequence);
|
||||
|
||||
if(indexQuery == null)
|
||||
return null;
|
||||
|
||||
final int binLevel = getLevelForBin(bin);
|
||||
final int firstLocusInBin = getFirstLocusInBin(bin);
|
||||
|
||||
// Add the specified bin to the tree if it exists.
|
||||
List<GATKBin> binTree = new ArrayList<GATKBin>();
|
||||
if(indexQuery.containsBin(gatkBin))
|
||||
binTree.add(indexQuery.getBins().getBin(gatkBin.getBinNumber()));
|
||||
|
||||
int currentBinLevel = binLevel;
|
||||
while(--currentBinLevel >= 0) {
|
||||
final int binStart = getFirstBinInLevel(currentBinLevel);
|
||||
final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel);
|
||||
final int binNumber = firstLocusInBin/binWidth + binStart;
|
||||
GATKBin parentBin = indexQuery.getBins().getBin(binNumber);
|
||||
if(parentBin != null && indexQuery.containsBin(parentBin))
|
||||
binTree.add(parentBin);
|
||||
}
|
||||
|
||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||
for(GATKBin coveringBin: binTree) {
|
||||
for(GATKChunk chunk: coveringBin.getGATKChunkList())
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
final int start = getFirstLocusInBin(bin);
|
||||
chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
|
||||
return new GATKBAMFileSpan(chunkList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of bins in the BAM file that may contain SAMRecords for the given range.
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return a list of bins that contain relevant data.
|
||||
*/
|
||||
public GATKBinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) {
|
||||
final BitSet regionBins = regionToBins(startPos,endPos);
|
||||
if (regionBins == null) {
|
||||
return null;
|
||||
}
|
||||
return new GATKBinList(referenceIndex,regionBins);
|
||||
}
|
||||
|
||||
protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) {
|
||||
seek(4);
|
||||
|
||||
List<GATKChunk> metaDataChunks = new ArrayList<GATKChunk>();
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
|
||||
if (referenceSequence >= sequenceCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final BitSet regionBins = regionToBins(startPos, endPos);
|
||||
if (regionBins == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
skipToSequence(referenceSequence);
|
||||
|
||||
int binCount = readInteger();
|
||||
boolean metaDataSeen = false;
|
||||
GATKBin[] bins = new GATKBin[getMaxBinNumberForReference(referenceSequence) +1];
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
||||
// System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
|
||||
GATKChunk lastChunk = null;
|
||||
if (regionBins.get(indexBin)) {
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong();
|
||||
final long chunkEnd = readLong();
|
||||
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
|
||||
chunks.add(lastChunk);
|
||||
}
|
||||
} else if (indexBin == MAX_BINS) {
|
||||
// meta data - build the bin so that the count of bins is correct;
|
||||
// but don't attach meta chunks to the bin, or normal queries will be off
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong();
|
||||
final long chunkEnd = readLong();
|
||||
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
|
||||
metaDataChunks.add(lastChunk);
|
||||
}
|
||||
metaDataSeen = true;
|
||||
continue; // don't create a Bin
|
||||
} else {
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
bin.setGATKChunkList(chunks);
|
||||
bin.setLastChunk(lastChunk);
|
||||
bins[indexBin] = bin;
|
||||
}
|
||||
|
||||
final int nLinearBins = readInteger();
|
||||
|
||||
final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos);
|
||||
final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1;
|
||||
final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1);
|
||||
|
||||
long[] linearIndexEntries = new long[0];
|
||||
if (regionLinearBinStart < nLinearBins) {
|
||||
linearIndexEntries = new long[actualStop-regionLinearBinStart+1];
|
||||
skipBytes(8 * regionLinearBinStart);
|
||||
for(int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++)
|
||||
linearIndexEntries[linearBin-regionLinearBinStart] = readLong();
|
||||
}
|
||||
|
||||
final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries);
|
||||
|
||||
return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), linearIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* The maxiumum bin number for a reference sequence of a given length
|
||||
*/
|
||||
static int getMaxBinNumberForSequenceLength(int sequenceLength) {
|
||||
return getFirstBinInLevel(getNumIndexLevels() - 1) + (sequenceLength >> 14);
|
||||
// return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise,
|
||||
* retrieves the cache results from disk.
|
||||
* @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references.
|
||||
* @return The index information for this reference.
|
||||
*/
|
||||
protected BAMIndexContent getQueryResults(final int referenceIndex) {
|
||||
// WeakHashMap is a bit weird in that its lookups are done via equals() equality, but expirations must be
|
||||
// handled via == equality. This implementation jumps through a few hoops to make sure that == equality still
|
||||
// holds even in the context of boxing/unboxing.
|
||||
|
||||
// If this query is for the same reference index as the last query, return it.
|
||||
if(mLastReferenceRetrieved!=null && mLastReferenceRetrieved == referenceIndex)
|
||||
return mQueriesByReference.get(referenceIndex);
|
||||
|
||||
// If not, check to see whether it's available in the cache.
|
||||
BAMIndexContent queryResults = mQueriesByReference.get(referenceIndex);
|
||||
if(queryResults != null) {
|
||||
mLastReferenceRetrieved = referenceIndex;
|
||||
mQueriesByReference.put(referenceIndex,queryResults);
|
||||
return queryResults;
|
||||
}
|
||||
|
||||
// If not in the cache, attempt to load it from disk.
|
||||
queryResults = query(referenceIndex,1,-1);
|
||||
if(queryResults != null) {
|
||||
mLastReferenceRetrieved = referenceIndex;
|
||||
mQueriesByReference.put(referenceIndex,queryResults);
|
||||
return queryResults;
|
||||
}
|
||||
|
||||
// Not even available on disk.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the possible number of bins for a given reference sequence.
|
||||
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
||||
*/
|
||||
protected int getMaxAddressibleGenomicLocation() {
|
||||
return BIN_GENOMIC_SPAN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get candidate bins for the specified region
|
||||
* @param startPos 1-based start of target region, inclusive.
|
||||
* @param endPos 1-based end of target region, inclusive.
|
||||
* @return bit set for each bin that may contain SAMRecords in the target region.
|
||||
*/
|
||||
protected BitSet regionToBins(final int startPos, final int endPos) {
|
||||
final int maxPos = 0x1FFFFFFF;
|
||||
final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
|
||||
final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
|
||||
if (start > end) {
|
||||
return null;
|
||||
}
|
||||
int k;
|
||||
final BitSet bitSet = new BitSet(MAX_BINS);
|
||||
bitSet.set(0);
|
||||
for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k);
|
||||
for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k);
|
||||
for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bitSet.set(k);
|
||||
for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bitSet.set(k);
|
||||
for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k);
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
protected List<GATKChunk> optimizeChunkList(final List<GATKChunk> chunks, final long minimumOffset) {
|
||||
GATKChunk lastChunk = null;
|
||||
Collections.sort(chunks);
|
||||
final List<GATKChunk> result = new ArrayList<GATKChunk>();
|
||||
for (final GATKChunk chunk : chunks) {
|
||||
if (chunk.getChunkEnd() <= minimumOffset) {
|
||||
continue; // linear index optimization
|
||||
}
|
||||
if (result.isEmpty()) {
|
||||
result.add(chunk);
|
||||
lastChunk = chunk;
|
||||
continue;
|
||||
}
|
||||
// Coalesce chunks that are in adjacent file blocks.
|
||||
// This is a performance optimization.
|
||||
if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) {
|
||||
result.add(chunk);
|
||||
lastChunk = chunk;
|
||||
} else {
|
||||
if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
|
||||
lastChunk.setChunkEnd(chunk.getChunkEnd());
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum possible bin number for this reference sequence.
|
||||
* This is based on the maximum coordinate position of the reference
|
||||
* which is based on the size of the reference
|
||||
*/
|
||||
private int getMaxBinNumberForReference(final int reference) {
|
||||
try {
|
||||
final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength();
|
||||
return getMaxBinNumberForSequenceLength(sequenceLength);
|
||||
} catch (Exception e) {
|
||||
return MAX_BINS;
|
||||
}
|
||||
}
|
||||
|
||||
private void skipToSequence(final int sequenceIndex) {
|
||||
for (int i = 0; i < sequenceIndex; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j = 0; j < nBins; j++) {
|
||||
final int bin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||
skipBytes(8 * nLinearBins);
|
||||
}
|
||||
}
|
||||
|
||||
private void readBytes(final byte[] bytes) {
|
||||
mFileBuffer.get(bytes);
|
||||
}
|
||||
|
||||
private int readInteger() {
|
||||
return mFileBuffer.getInt();
|
||||
}
|
||||
|
||||
private long readLong() {
|
||||
return mFileBuffer.getLong();
|
||||
}
|
||||
|
||||
private void skipBytes(final int count) {
|
||||
mFileBuffer.position(mFileBuffer.position() + count);
|
||||
}
|
||||
|
||||
private void seek(final int position) {
|
||||
mFileBuffer.position(position);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -12,31 +12,40 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.AbstractBAMFileIndex;
|
||||
import net.sf.samtools.Bin;
|
||||
import net.sf.samtools.BrowseableBAMIndex;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Queue;
|
||||
|
||||
/**
|
||||
* Shard intervals based on position within the BAM file.
|
||||
*
|
||||
|
|
@ -47,7 +56,7 @@ public class IntervalSharder {
|
|||
private static Logger logger = Logger.getLogger(IntervalSharder.class);
|
||||
|
||||
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||
return new FilePointerIterator(dataSource,loci);
|
||||
return new IntervalSharder.FilePointerIterator(dataSource,loci);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,7 +126,7 @@ public class IntervalSharder {
|
|||
for(SAMReaderID id: dataSource.getReaderIDs())
|
||||
filePointer.addFileSpans(id,null);
|
||||
return Collections.singletonList(filePointer);
|
||||
}
|
||||
}
|
||||
|
||||
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
|
||||
List<FilePointer> filePointers = new ArrayList<FilePointer>();
|
||||
|
|
@ -125,7 +134,7 @@ public class IntervalSharder {
|
|||
BAMOverlap lastBAMOverlap = null;
|
||||
|
||||
Map<SAMReaderID,BrowseableBAMIndex> readerToIndexMap = new HashMap<SAMReaderID,BrowseableBAMIndex>();
|
||||
BinMergingIterator binMerger = new BinMergingIterator();
|
||||
IntervalSharder.BinMergingIterator binMerger = new IntervalSharder.BinMergingIterator();
|
||||
for(SAMReaderID id: dataSource.getReaderIDs()) {
|
||||
final SAMSequenceRecord referenceSequence = dataSource.getHeader(id).getSequence(contig);
|
||||
// If this contig can't be found in the reference, skip over it.
|
||||
|
|
@ -153,7 +162,7 @@ public class IntervalSharder {
|
|||
final int locationStop = (int)location.getStop();
|
||||
|
||||
// Advance to first bin.
|
||||
while(binIterator.peek().stop < locationStart)
|
||||
while(binIterator.peek().stop < locationStart)
|
||||
binIterator.next();
|
||||
|
||||
// Add all relevant bins to a list. If the given bin extends beyond the end of the current interval, make
|
||||
|
|
@ -230,7 +239,7 @@ public class IntervalSharder {
|
|||
for(FilePointer filePointer: filePointers)
|
||||
filePointer.addFileSpans(id,index.getSpanOverlapping(filePointer.overlap.getBin(id)));
|
||||
}
|
||||
|
||||
|
||||
return filePointers;
|
||||
}
|
||||
|
||||
|
|
@ -239,7 +248,7 @@ public class IntervalSharder {
|
|||
private Queue<BAMOverlap> pendingOverlaps = new LinkedList<BAMOverlap>();
|
||||
|
||||
public void addReader(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, Iterator<Bin> bins) {
|
||||
binQueue.add(new BinQueueState(id,index,referenceSequence,new LowestLevelBinFilteringIterator(index,bins)));
|
||||
binQueue.add(new BinQueueState(id,index,referenceSequence,new IntervalSharder.LowestLevelBinFilteringIterator(index,bins)));
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -369,89 +378,7 @@ public class IntervalSharder {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a small section of a BAM file, and every associated interval.
|
||||
*/
|
||||
class FilePointer {
|
||||
protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||
protected final String referenceSequence;
|
||||
protected final BAMOverlap overlap;
|
||||
protected final List<GenomeLoc> locations;
|
||||
|
||||
/**
|
||||
* Does this file pointer point into an unmapped region?
|
||||
*/
|
||||
protected final boolean isRegionUnmapped;
|
||||
|
||||
public FilePointer(final GenomeLoc location) {
|
||||
this.referenceSequence = location.getContig();
|
||||
this.overlap = null;
|
||||
this.locations = Collections.singletonList(location);
|
||||
this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
|
||||
}
|
||||
|
||||
public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.overlap = overlap;
|
||||
this.locations = new ArrayList<GenomeLoc>();
|
||||
this.isRegionUnmapped = false;
|
||||
}
|
||||
|
||||
public void addLocation(GenomeLoc location) {
|
||||
locations.add(location);
|
||||
}
|
||||
|
||||
public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) {
|
||||
this.fileSpans.put(id,fileSpan);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Models a bin at which all BAM files in the merged input stream overlap.
|
||||
*/
|
||||
class BAMOverlap {
|
||||
public final int start;
|
||||
public final int stop;
|
||||
|
||||
private final Map<SAMReaderID,Bin> bins = new HashMap<SAMReaderID,Bin>();
|
||||
|
||||
public BAMOverlap(final int start, final int stop) {
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public void addBin(final SAMReaderID id, final Bin bin) {
|
||||
bins.put(id,bin);
|
||||
}
|
||||
|
||||
public Bin getBin(final SAMReaderID id) {
|
||||
return bins.get(id);
|
||||
}
|
||||
}
|
||||
|
||||
class ReaderBin {
|
||||
public final SAMReaderID id;
|
||||
public final BrowseableBAMIndex index;
|
||||
public final int referenceSequence;
|
||||
public final Bin bin;
|
||||
|
||||
public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) {
|
||||
this.id = id;
|
||||
this.index = index;
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.bin = bin;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return index.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
public int getStop() {
|
||||
return index.getLastLocusInBin(bin);
|
||||
}
|
||||
}
|
||||
|
||||
class BinQueueState implements Comparable<BinQueueState> {
|
||||
class BinQueueState implements Comparable<org.broadinstitute.sting.gatk.datasources.reads.BinQueueState> {
|
||||
private final SAMReaderID id;
|
||||
private final BrowseableBAMIndex index;
|
||||
private final int referenceSequence;
|
||||
|
|
@ -494,7 +421,7 @@ class BinQueueState implements Comparable<BinQueueState> {
|
|||
return nextBin;
|
||||
}
|
||||
|
||||
public int compareTo(BinQueueState other) {
|
||||
public int compareTo(org.broadinstitute.sting.gatk.datasources.reads.BinQueueState other) {
|
||||
if(!this.bins.hasNext() && !other.bins.hasNext()) return 0;
|
||||
if(!this.bins.hasNext()) return -1;
|
||||
if(!this.bins.hasNext()) return 1;
|
||||
|
|
@ -522,8 +449,4 @@ class BinQueueState implements Comparable<BinQueueState> {
|
|||
lastLocusInCurrentBin = index.getLastLocusInBin(bin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,20 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
/**
|
||||
* Handles locus shards of BAM information.
|
||||
|
|
@ -22,7 +15,7 @@ import net.sf.picard.filter.SamRecordFilter;
|
|||
* @version 1.0
|
||||
* @date Apr 7, 2009
|
||||
*/
|
||||
public class LocusShard extends BAMFormatAwareShard {
|
||||
public class LocusShard extends Shard {
|
||||
/**
|
||||
* Create a new locus shard, divided by index.
|
||||
* @param intervals List of intervals to process.
|
||||
|
|
@ -22,17 +22,13 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -14,7 +11,7 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class MonolithicShard extends BAMFormatAwareShard {
|
||||
public class MonolithicShard extends Shard {
|
||||
/**
|
||||
* Creates a new monolithic shard of the given type.
|
||||
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -1,18 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/**
|
||||
|
|
@ -37,7 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReadShard extends BAMFormatAwareShard {
|
||||
public class ReadShard extends Shard {
|
||||
/**
|
||||
* The reads making up this shard.
|
||||
*/
|
||||
|
|
@ -22,15 +22,12 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
|
|
@ -159,7 +156,7 @@ public class ReadShardStrategy implements ShardStrategy {
|
|||
}
|
||||
|
||||
if(selectedReaders.size() > 0) {
|
||||
BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
dataSource.fillShard(shard);
|
||||
|
||||
if(!shard.isBufferEmpty()) {
|
||||
|
|
@ -175,7 +172,7 @@ public class ReadShardStrategy implements ShardStrategy {
|
|||
else {
|
||||
// todo -- this nulling of intervals is a bit annoying since readwalkers without
|
||||
// todo -- any -L values need to be special cased throughout the code.
|
||||
BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false);
|
||||
Shard shard = new ReadShard(parser,dataSource,position,null,false);
|
||||
dataSource.fillShard(shard);
|
||||
nextShard = !shard.isBufferEmpty() ? shard : null;
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.Bin;
|
||||
import net.sf.samtools.BrowseableBAMIndex;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 2, 2011
|
||||
* Time: 4:36:40 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class ReaderBin {
|
||||
public final SAMReaderID id;
|
||||
public final BrowseableBAMIndex index;
|
||||
public final int referenceSequence;
|
||||
public final Bin bin;
|
||||
|
||||
public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) {
|
||||
this.id = id;
|
||||
this.index = index;
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.bin = bin;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return index.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
public int getStop() {
|
||||
return index.getLastLocusInBin(bin);
|
||||
}
|
||||
}
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
|
@ -33,10 +33,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
|
|
@ -59,7 +55,7 @@ import java.util.*;
|
|||
* <p/>
|
||||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class SAMDataSource implements SimpleDataSource {
|
||||
public class SAMDataSource {
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
||||
|
|
@ -423,7 +419,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @param shard Shard to fill.
|
||||
* @return true if at the end of the stream. False otherwise.
|
||||
*/
|
||||
public void fillShard(BAMFormatAwareShard shard) {
|
||||
public void fillShard(Shard shard) {
|
||||
if(!shard.buffersReads())
|
||||
throw new ReviewedStingException("Attempting to fill a non-buffering shard.");
|
||||
|
||||
|
|
@ -456,16 +452,12 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
if(shard instanceof MonolithicShard)
|
||||
return seekMonolithic(shard);
|
||||
|
||||
if(!(shard instanceof BAMFormatAwareShard))
|
||||
throw new ReviewedStingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard.getClass());
|
||||
BAMFormatAwareShard bamAwareShard = (BAMFormatAwareShard)shard;
|
||||
|
||||
if(bamAwareShard.buffersReads()) {
|
||||
return bamAwareShard.iterator();
|
||||
if(shard.buffersReads()) {
|
||||
return shard.iterator();
|
||||
}
|
||||
else {
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
return getIterator(readers,bamAwareShard,shard instanceof ReadShard);
|
||||
return getIterator(readers,shard,shard instanceof ReadShard);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -500,7 +492,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* TODO: Collapse this flag when the two sharding systems are merged.
|
||||
* @return An iterator over the selected data.
|
||||
*/
|
||||
private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) {
|
||||
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
|
||||
|
||||
// Set up merging to dynamically merge together multiple BAMs.
|
||||
|
|
@ -566,7 +558,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @param id The id of the given reader.
|
||||
* @param read The read to add to the shard.
|
||||
*/
|
||||
private void addReadToBufferingShard(BAMFormatAwareShard shard,SAMReaderID id,SAMRecord read) {
|
||||
private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) {
|
||||
SAMFileSpan endChunk = read.getFileSource().getFilePointer().getContentsFollowing();
|
||||
shard.addRead(read);
|
||||
readerPositions.put(id,endChunk);
|
||||
|
|
@ -1,10 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Uniquely identifies a SAM file reader.
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 10, 2009
|
||||
* Time: 5:00:27 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 10, 2009
|
||||
* <p/>
|
||||
* Interface Shard
|
||||
* <p/>
|
||||
* The base abstract class for shards.
|
||||
*/
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
||||
/**
|
||||
* What type of shard is this? Read or locus?
|
||||
*/
|
||||
protected final ShardType shardType;
|
||||
|
||||
/**
|
||||
* Locations.
|
||||
*/
|
||||
protected final List<GenomeLoc> locs;
|
||||
|
||||
/**
|
||||
* Whether the current location is unmapped.
|
||||
*/
|
||||
private final boolean isUnmapped;
|
||||
|
||||
/**
|
||||
* Reads data, if applicable.
|
||||
*/
|
||||
private final SAMDataSource readsDataSource;
|
||||
|
||||
/**
|
||||
* The data backing the next chunks to deliver to the traversal engine.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
protected final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
/**
|
||||
* Whether this shard points to an unmapped region.
|
||||
* Some shard types conceptually be unmapped (e.g. LocusShards). In
|
||||
* this case, isUnmapped should always return false.
|
||||
* @return True if this shard is unmapped. False otherwise.
|
||||
*/
|
||||
public boolean isUnmapped() {
|
||||
return isUnmapped;
|
||||
}
|
||||
|
||||
public Shard(GenomeLocParser parser,
|
||||
ShardType shardType,
|
||||
List<GenomeLoc> locs,
|
||||
SAMDataSource readsDataSource,
|
||||
Map<SAMReaderID,SAMFileSpan> fileSpans,
|
||||
boolean isUnmapped) {
|
||||
this.locs = locs;
|
||||
this.parser = parser;
|
||||
this.shardType = shardType;
|
||||
this.readsDataSource = readsDataSource;
|
||||
this.fileSpans = fileSpans;
|
||||
this.isUnmapped = isUnmapped;
|
||||
}
|
||||
|
||||
/**
|
||||
* If isUnmapped is true, than getGenomeLocs by
|
||||
* definition will return a singleton list with a GenomeLoc.UNMAPPED
|
||||
*
|
||||
* Can return null, indicating that the entire genome is covered.
|
||||
*
|
||||
* @return the genome location represented by this shard
|
||||
*/
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of chunks delimiting this shard.
|
||||
* @return a list of chunks that contain data for this shard.
|
||||
*/
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||
return Collections.unmodifiableMap(fileSpans);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the span of the genomeLocs comprising this shard
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
if ( getGenomeLocs() == null )
|
||||
return GenomeLoc.WHOLE_GENOME;
|
||||
|
||||
int start = Integer.MAX_VALUE;
|
||||
int stop = Integer.MIN_VALUE;
|
||||
String contig = null;
|
||||
|
||||
for ( GenomeLoc loc : getGenomeLocs() ) {
|
||||
if ( GenomeLoc.isUnmapped(loc) )
|
||||
// special case the unmapped region marker, just abort out
|
||||
return loc;
|
||||
contig = loc.getContig();
|
||||
if ( loc.getStart() < start ) start = loc.getStart();
|
||||
if ( loc.getStop() > stop ) stop = loc.getStop();
|
||||
}
|
||||
|
||||
return parser.createGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public ShardType getShardType() {
|
||||
return shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does any releasing / aggregation required when the shard is through being processed.
|
||||
*/
|
||||
public void close() {
|
||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
public ReadProperties getReadProperties() {
|
||||
return readsDataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the runtime metrics associated with this shard.
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
* @return True if this shard can buffer reads. False otherwise.
|
||||
*/
|
||||
public boolean buffersReads() { return false; }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Adds a read to the read buffer.
|
||||
* @param read Add a read to the internal shard buffer.
|
||||
*/
|
||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Gets the iterator over the elements cached in the shard.
|
||||
* @return
|
||||
*/
|
||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
|
|
@ -1,11 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Efficiently queries BAM read storage files by genomic location.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.utilities;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.utilities;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Standalone utilities for working with BAM files.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||
|
|
@ -23,14 +23,13 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.FastaSequenceIndex;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
|
||||
public interface ReferenceDataSourceProgressListener {
|
||||
public void percentProgress(int percent);
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Efficiently queries FASTA reference sequence files by genomic location.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
/**
|
||||
* Marker interface that represents an arbitrary consecutive segment within a data stream.
|
||||
*/
|
||||
interface DataStreamSegment {
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
/**
|
||||
* Models the entire stream of data.
|
||||
*/
|
||||
class EntireStream implements DataStreamSegment {
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
|
||||
/**
|
||||
* Models a mapped position within a stream of GATK input data.
|
||||
*/
|
||||
class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation {
|
||||
public final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* Retrieves the first location covered by a mapped stream segment.
|
||||
* @return Location of the first base in this segment.
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return locus;
|
||||
}
|
||||
|
||||
public MappedStreamSegment(GenomeLoc locus) {
|
||||
this.locus = locus;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||
// the reference-ordered data itself.
|
||||
private final RMDTriplet fileDescriptor;
|
||||
|
||||
// our tribble track builder
|
||||
private final RMDTrackBuilder builder;
|
||||
|
||||
/**
|
||||
* The header from this RMD, if present.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
|
||||
*/
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
boolean flashbackData = false;
|
||||
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser,boolean flashbackData) {
|
||||
super(sequenceDictionary,genomeLocParser);
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
this.flashbackData = flashbackData;
|
||||
|
||||
// prepopulate one RMDTrack
|
||||
LocationAwareSeekableRODIterator iterator = createNewResource();
|
||||
this.addNewResource(iterator);
|
||||
|
||||
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||
this.header = iterator.getHeader();
|
||||
this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header used by this resource pool.
|
||||
* @return Header used by this resource pool.
|
||||
*/
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary built into the ROD index file.
|
||||
* @return Sequence dictionary from the index file.
|
||||
*/
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new iterator from the existing reference-ordered data. This new iterator is expected
|
||||
* to be completely independent of any other iterator.
|
||||
* @return The newly created resource.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
if(numIterators() > 0)
|
||||
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as
|
||||
* the first one encountered that is at or before the given position.
|
||||
* @param segment @{inheritedDoc}
|
||||
* @param resources @{inheritedDoc}
|
||||
* @return @{inheritedDoc}
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
|
||||
if(segment instanceof MappedStreamSegment) {
|
||||
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
|
||||
|
||||
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
|
||||
|
||||
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
|
||||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
|
||||
return RODIterator;
|
||||
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
|
||||
((FlashBackIterator)RODIterator).flashBackTo(position);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
}
|
||||
return null;
|
||||
}
|
||||
else if(segment instanceof EntireStream) {
|
||||
// Asking for a segment over the entire stream, so by definition, there is no best existing resource.
|
||||
// Force the system to create a new one.
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, the iterator is the resource. Pass it through.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
|
||||
return resource;
|
||||
}
|
||||
|
||||
/**
|
||||
* kill the buffers in the iterator
|
||||
*/
|
||||
public void closeResource( LocationAwareSeekableRODIterator resource ) {
|
||||
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,42 +1,49 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:04:12 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A data source which provides a single type of reference-ordered data.
|
||||
*/
|
||||
public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||
public class ReferenceOrderedDataSource {
|
||||
/**
|
||||
* The reference-ordered data itself.
|
||||
*/
|
||||
|
|
@ -102,7 +109,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
}
|
||||
|
||||
public Class getType() {
|
||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
}
|
||||
|
||||
public Class getRecordType() {
|
||||
|
|
@ -114,7 +121,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
}
|
||||
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -170,120 +177,6 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||
// the reference-ordered data itself.
|
||||
private final RMDTriplet fileDescriptor;
|
||||
|
||||
// our tribble track builder
|
||||
private final RMDTrackBuilder builder;
|
||||
|
||||
/**
|
||||
* The header from this RMD, if present.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
|
||||
*/
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
boolean flashbackData = false;
|
||||
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
|
||||
super(sequenceDictionary,genomeLocParser);
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
this.flashbackData = flashbackData;
|
||||
|
||||
// prepopulate one RMDTrack
|
||||
LocationAwareSeekableRODIterator iterator = createNewResource();
|
||||
this.addNewResource(iterator);
|
||||
|
||||
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||
this.header = iterator.getHeader();
|
||||
this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header used by this resource pool.
|
||||
* @return Header used by this resource pool.
|
||||
*/
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary built into the ROD index file.
|
||||
* @return Sequence dictionary from the index file.
|
||||
*/
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new iterator from the existing reference-ordered data. This new iterator is expected
|
||||
* to be completely independent of any other iterator.
|
||||
* @return The newly created resource.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
if(numIterators() > 0)
|
||||
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as
|
||||
* the first one encountered that is at or before the given position.
|
||||
* @param segment @{inheritedDoc}
|
||||
* @param resources @{inheritedDoc}
|
||||
* @return @{inheritedDoc}
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
|
||||
if(segment instanceof MappedStreamSegment) {
|
||||
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
|
||||
|
||||
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
|
||||
|
||||
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
|
||||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
|
||||
return RODIterator;
|
||||
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
|
||||
((FlashBackIterator)RODIterator).flashBackTo(position);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
}
|
||||
return null;
|
||||
}
|
||||
else if(segment instanceof EntireStream) {
|
||||
// Asking for a segment over the entire stream, so by definition, there is no best existing resource.
|
||||
// Force the system to create a new one.
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, the iterator is the resource. Pass it through.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
|
||||
return resource;
|
||||
}
|
||||
|
||||
/**
|
||||
* kill the buffers in the iterator
|
||||
*/
|
||||
public void closeResource( LocationAwareSeekableRODIterator resource ) {
|
||||
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a data pool for the new query based RODs
|
||||
*/
|
||||
|
|
@ -356,6 +249,4 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
|
|||
protected void closeResource(RMDTrack track) {
|
||||
track.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,28 +1,38 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:55:26 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A pool of open resources, all of which can create a closeable iterator.
|
||||
|
|
@ -37,7 +47,7 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
* Builder/parser for GenomeLocs.
|
||||
*/
|
||||
protected final GenomeLocParser genomeLocParser;
|
||||
|
||||
|
||||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
|
|
@ -89,7 +99,7 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
// Make a note of this assignment for proper releasing later.
|
||||
resourceAssignments.put(iterator, selectedResource);
|
||||
}
|
||||
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
|
|
@ -133,7 +143,7 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
protected abstract T createNewResource();
|
||||
|
||||
/**
|
||||
* Find the most appropriate resource to acquire the specified data.
|
||||
* Find the most appropriate resource to acquire the specified data.
|
||||
* @param segment The data over which the resource is required.
|
||||
* @param availableResources A list of candidate resources to evaluate.
|
||||
* @return The best choice of the availableResources, or null if no resource meets the criteria.
|
||||
|
|
@ -175,33 +185,3 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Marker interface that represents an arbitrary consecutive segment within a data stream.
|
||||
*/
|
||||
interface DataStreamSegment {
|
||||
}
|
||||
|
||||
/**
|
||||
* Models the entire stream of data.
|
||||
*/
|
||||
class EntireStream implements DataStreamSegment {
|
||||
}
|
||||
|
||||
/**
|
||||
* Models a mapped position within a stream of GATK input data.
|
||||
*/
|
||||
class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation {
|
||||
public final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* Retrieves the first location covered by a mapped stream segment.
|
||||
* @return Location of the first base in this segment.
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return locus;
|
||||
}
|
||||
|
||||
public MappedStreamSegment(GenomeLoc locus) {
|
||||
this.locus = locus;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Efficiently queries arbitrary files sorted according to reference coordinate order.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
|
@ -1,117 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/**
|
||||
* A common interface for shards that natively understand the BAM format.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class BAMFormatAwareShard extends Shard {
|
||||
/**
|
||||
* Whether the current location is unmapped.
|
||||
*/
|
||||
private final boolean isUnmapped;
|
||||
|
||||
/**
|
||||
* Reads data, if applicable.
|
||||
*/
|
||||
private final SAMDataSource readsDataSource;
|
||||
|
||||
/**
|
||||
* The data backing the next chunks to deliver to the traversal engine.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
public BAMFormatAwareShard(GenomeLocParser parser,
|
||||
ShardType shardType,
|
||||
List<GenomeLoc> locs,
|
||||
SAMDataSource readsDataSource,
|
||||
Map<SAMReaderID,SAMFileSpan> fileSpans,
|
||||
boolean isUnmapped) {
|
||||
super(parser, shardType, locs);
|
||||
this.readsDataSource = readsDataSource;
|
||||
this.fileSpans = fileSpans;
|
||||
this.isUnmapped = isUnmapped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the shard, tallying and incorporating read data.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of chunks delimiting this shard.
|
||||
* @return a list of chunks that contain data for this shard.
|
||||
*/
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||
return Collections.unmodifiableMap(fileSpans);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
@Override
|
||||
public ReadProperties getReadProperties() {
|
||||
return readsDataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
* @return True if this shard can buffer reads. False otherwise.
|
||||
*/
|
||||
public boolean buffersReads() { return false; }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Adds a read to the read buffer.
|
||||
* @param read Add a read to the internal shard buffer.
|
||||
*/
|
||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Gets the iterator over the elements cached in the shard.
|
||||
* @return
|
||||
*/
|
||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
|
||||
/**
|
||||
* Whether this shard points to an unmapped region.
|
||||
* Some shard types conceptually be unmapped (e.g. LocusShards). In
|
||||
* this case, isUnmapped should always return false.
|
||||
* @return True if this shard is unmapped. False otherwise.
|
||||
*/
|
||||
public boolean isUnmapped() {
|
||||
return isUnmapped;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 10, 2009
|
||||
* Time: 5:00:27 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 10, 2009
|
||||
* <p/>
|
||||
* Interface Shard
|
||||
* <p/>
|
||||
* The base abstract class for shards.
|
||||
*/
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
||||
/**
|
||||
* What type of MonolithicShard is this? Read or locus?
|
||||
*/
|
||||
protected final ShardType shardType;
|
||||
|
||||
/**
|
||||
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
|
||||
*/
|
||||
protected final List<GenomeLoc> locs;
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
protected final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
public Shard(GenomeLocParser parser, ShardType shardType, List<GenomeLoc> locs) {
|
||||
this.locs = locs;
|
||||
this.parser = parser;
|
||||
this.shardType = shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* If isUnmapped is true, than getGenomeLocs by
|
||||
* definition will return a singleton list with a GenomeLoc.UNMAPPED
|
||||
*
|
||||
* Can return null, indicating that the entire genome is covered.
|
||||
*
|
||||
* @return the genome location represented by this shard
|
||||
*/
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the span of the genomeLocs comprising this shard
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
if ( getGenomeLocs() == null )
|
||||
return GenomeLoc.WHOLE_GENOME;
|
||||
|
||||
int start = Integer.MAX_VALUE;
|
||||
int stop = Integer.MIN_VALUE;
|
||||
String contig = null;
|
||||
|
||||
for ( GenomeLoc loc : getGenomeLocs() ) {
|
||||
if ( GenomeLoc.isUnmapped(loc) )
|
||||
// special case the unmapped region marker, just abort out
|
||||
return loc;
|
||||
contig = loc.getContig();
|
||||
if ( loc.getStart() < start ) start = loc.getStart();
|
||||
if ( loc.getStop() > stop ) stop = loc.getStop();
|
||||
}
|
||||
|
||||
return parser.createGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public ShardType getShardType() {
|
||||
return shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does any releasing / aggregation required when the shard is through being processed.
|
||||
*/
|
||||
public void close() {
|
||||
; // by default don't do anything
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets required configuration for validating and filtering reads.
|
||||
* @return read configuration properties.
|
||||
*/
|
||||
public abstract ReadProperties getReadProperties();
|
||||
|
||||
/**
|
||||
* Gets the runtime metrics associated with this shard.
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
/**
|
||||
* Divides core input data into manageable blocks, each representing
|
||||
* a subsection of data that is small enough to be processed by a single
|
||||
* worker.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:39:05 PM
|
||||
* <p/>
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/** This class is the interface for all data sources */
|
||||
public interface SimpleDataSource extends Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Query the data source for a region of interest, specified by the genome location.
|
||||
* The iterator will generate successive calls
|
||||
*
|
||||
* @param shard the region
|
||||
* @return an iterator of the appropriate type, that is limited by the region
|
||||
*/
|
||||
public Iterator seek(Shard shard);
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
/**
|
||||
* Retrieves core GATK data, given a shard which represents a contiguous
|
||||
* subsequence of core GATK data (reads, reference, or reference-ordered
|
||||
* data).
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
|
@ -1,20 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.io.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.ObjectName;
|
||||
import javax.management.JMException;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Collection;
|
||||
|
|
@ -22,7 +19,6 @@ import java.util.concurrent.Executors;
|
|||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.FutureTask;
|
||||
import java.lang.management.ManagementFactory;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
|
|
@ -40,13 +40,11 @@ import org.broadinstitute.sting.gatk.ReadMetrics;
|
|||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.threading.*;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.apache.log4j.Logger;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMRecordCoordinateComparator;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broad.tribble.vcf.*;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broad.tribble.vcf.*;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
|
||||
|
|
|
|||
|
|
@ -33,10 +33,9 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broad.tribble.vcf.*;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
|
||||
|
|
@ -38,13 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
@ -45,7 +46,6 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
|
|||
|
|
@ -33,10 +33,10 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broad.tribble.vcf.VCFCodec;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import java.util.regex.Pattern;
|
|||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broad.tribble.vcf.*;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Hidden;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
|
|
|
|||
|
|
@ -31,10 +31,8 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.ReadPair;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.AlignmentInfo;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.Assembly;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
|
|||
|
|
@ -7,9 +7,8 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
|
|
|
|||
|
|
@ -3,10 +3,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.interval;
|
|||
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
|
|||
|
|
@ -25,14 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.vcf;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package net.sf.picard.reference;
|
|||
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
|
|
|
|||
|
|
@ -22,11 +22,12 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -1,13 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import static org.testng.Assert.fail;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -22,7 +24,6 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
|
|
@ -19,7 +18,6 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Collections;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
/**
|
||||
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
|
|
|||
|
|
@ -8,11 +8,11 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -31,7 +31,6 @@ import java.io.FileOutputStream;
|
|||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -119,7 +118,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser);
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref, ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize,
|
||||
genomeLocParser);
|
||||
|
|
|
|||
Loading…
Reference in New Issue