Convert GenomeLocParser into an instance variable. This change is required

for anything that needs to be simultaneously aware of multiple references, eg
Queue's interval sharding code, liftover support, distributed GATK etc.  

GenomeLocParser instances must now be used to create/parse GenomeLocs.
GenomeLocParser instances are available in walkers by calling either

-getToolkit().getGenomeLocParser()
or
-refContext.getGenomeLocParser()

This is an intermediate change; GenomeLocParser will eventually be merged
with the reference, but we're not clear exactly how to do that yet.  This
will become clearer when contig aliasing is implemented.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4642 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-11-10 17:59:50 +00:00
parent 760f06cf8c
commit 8e36a07bea
135 changed files with 1305 additions and 1317 deletions

View File

@ -48,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
@ -69,6 +68,11 @@ public abstract class AbstractGenomeAnalysisEngine {
*/
private ParsingEngine parsingEngine;
/**
* The genomeLocParser can create and parse GenomeLocs.
*/
private GenomeLocParser genomeLocParser;
/**
* Accessor for sharded read data.
*/
@ -82,6 +86,10 @@ public abstract class AbstractGenomeAnalysisEngine {
return referenceDataSource;
}
public GenomeLocParser getGenomeLocParser() {
return genomeLocParser;
}
/**
* Accessor for sharded reference data.
*/
@ -136,6 +144,14 @@ public abstract class AbstractGenomeAnalysisEngine {
this.parsingEngine = parsingEngine;
}
/**
* Explicitly set the GenomeLocParser, for unit testing.
* @param genomeLocParser GenomeLocParser to use.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
/**
* Actually run the engine.
* @return the value of this traversal.
@ -188,7 +204,7 @@ public abstract class AbstractGenomeAnalysisEngine {
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
loadIntervals(argCollection.intervals,
argCollection.intervalMerging,
GenomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging)));
genomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging)));
// if no exclude arguments, can return parseIntervalArguments directly
if (argCollection.excludeIntervals == null)
@ -221,11 +237,11 @@ public abstract class AbstractGenomeAnalysisEngine {
IntervalMergingRule mergingRule,
List<GenomeLoc> additionalIntervals) {
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
IntervalUtils.parseIntervalArguments(argList,
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
argCollection.BTIMergeRule),
mergingRule);
return IntervalUtils.sortAndMergeIntervals(genomeLocParser,IntervalUtils.mergeListsBySetOperator(additionalIntervals,
IntervalUtils.parseIntervalArguments(genomeLocParser,argList,
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
argCollection.BTIMergeRule),
mergingRule);
}
/**
@ -298,22 +314,22 @@ public abstract class AbstractGenomeAnalysisEngine {
protected void initializeDataSources() {
logger.info("Strictness is " + argCollection.strictnessLevel);
validateSuppliedReference();
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
validateSuppliedReads();
readsDataSource = createReadsDataSource();
readsDataSource = createReadsDataSource(genomeLocParser);
for (SamRecordFilter filter : filters)
if (filter instanceof SamRecordHeaderFilter)
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
validateSuppliedReference();
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
RMDTrackBuilder manager = new RMDTrackBuilder();
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser);
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
validateSuppliedReferenceOrderedData(tracks);
@ -330,7 +346,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* @return A unique identifier for the source file of this read. Exception if not found.
*/
public SAMReaderID getReaderIDForRead(final SAMRecord read) {
return getDataSource().getReaderID(read);
return getReadsDataSource().getReaderID(read);
}
/**
@ -339,7 +355,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* @return The source filename for this read.
*/
public File getSourceFileForReaderID(final SAMReaderID id) {
return getDataSource().getSAMFile(id);
return getReadsDataSource().getSAMFile(id);
}
/**
@ -351,7 +367,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* @return Sets of samples in the merged input SAM stream, grouped by readers
*/
public List<Set<String>> getSamplesByReaders() {
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
@ -360,7 +376,7 @@ public abstract class AbstractGenomeAnalysisEngine {
Set<String> samples = new HashSet<String>(1);
sample_sets.add(samples);
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
samples.add(g.getSample());
}
}
@ -380,7 +396,7 @@ public abstract class AbstractGenomeAnalysisEngine {
public List<Set<String>> getLibrariesByReaders() {
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
@ -389,7 +405,7 @@ public abstract class AbstractGenomeAnalysisEngine {
Set<String> libs = new HashSet<String>(2);
lib_sets.add(libs);
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
libs.add(g.getLibrary());
}
}
@ -406,22 +422,22 @@ public abstract class AbstractGenomeAnalysisEngine {
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
// populate the file -> read group mapping
Map<File, Set<String>> fileToReadGroupIdMap = new HashMap<File, Set<String>>();
for (SAMReaderID id: getDataSource().getReaderIDs()) {
for (SAMReaderID id: getReadsDataSource().getReaderIDs()) {
Set<String> readGroups = new HashSet<String>(5);
for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) {
if (getDataSource().hasReadGroupCollisions()) {
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(id).getReadGroups()) {
if (getReadsDataSource().hasReadGroupCollisions()) {
// Check if there were read group clashes.
// If there were, use the SamFileHeaderMerger to translate from the
// original read group id to the read group id in the merged stream
readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId()));
readGroups.add(getReadsDataSource().getReadGroupId(id,g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
readGroups.add(g.getReadGroupId());
}
}
fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups);
fileToReadGroupIdMap.put(getReadsDataSource().getSAMFile(id),readGroups);
}
return fileToReadGroupIdMap;
@ -440,7 +456,7 @@ public abstract class AbstractGenomeAnalysisEngine {
public List<Set<String>> getMergedReadGroupsByReaders() {
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
@ -449,11 +465,11 @@ public abstract class AbstractGenomeAnalysisEngine {
Set<String> groups = new HashSet<String>(5);
rg_sets.add(groups);
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
// use HeaderMerger to translate original read group id from the reader into the read group id in the
// merged stream, and save that remapped read group id to associate it with specific reader
groups.add(getDataSource().getReadGroupId(r, g.getReadGroupId()));
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
groups.add(g.getReadGroupId());
@ -533,29 +549,17 @@ public abstract class AbstractGenomeAnalysisEngine {
}
/**
* Convenience function that binds RODs using the old-style command line parser to the new style list for
* a uniform processing.
*
* @param name the name of the rod
* @param type its type
* @param file the file to load the rod from
*/
private void bindConvenienceRods(final String name, final String type, final String file) {
argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file}));
}
/**
* Gets a data source for the given set of reads.
*
* @return A data source for the given set of reads.
*/
private SAMDataSource createReadsDataSource() {
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser) {
DownsamplingMethod method = getDownsamplingMethod();
return new SAMDataSource(
unpackBAMFileList(argCollection.samFiles),
genomeLocParser,
argCollection.useOriginalBaseQualities,
argCollection.strictnessLevel,
argCollection.readBufferSize,
@ -574,7 +578,7 @@ public abstract class AbstractGenomeAnalysisEngine {
*/
private ReferenceDataSource openReferenceSequenceFile(File refFile) {
ReferenceDataSource ref = new ReferenceDataSource(refFile);
GenomeLocParser.setupRefContigOrdering(ref.getReference());
genomeLocParser = new GenomeLocParser(ref.getReference());
return ref;
}
@ -587,7 +591,7 @@ public abstract class AbstractGenomeAnalysisEngine {
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTrack rod : rods)
dataSources.add(new ReferenceOrderedDataSource(rod, flashbackData()));
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData()));
return dataSources;
}
@ -614,10 +618,12 @@ public abstract class AbstractGenomeAnalysisEngine {
*
* @return the reads data source
*/
public SAMDataSource getDataSource() {
public SAMDataSource getReadsDataSource() {
return this.readsDataSource;
}
/**
* Sets the collection of GATK main application arguments.
*

View File

@ -165,7 +165,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given");
}
return MicroScheduler.create(this,my_walker,this.getDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads);
return MicroScheduler.create(this,my_walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads);
}
@Override
@ -258,7 +258,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
*/
protected ShardStrategy getShardStrategy(ReferenceSequenceFile drivingDataSource) {
GenomeLocSortedSet intervals = this.getIntervals();
SAMDataSource readsDataSource = this.getDataSource();
SAMDataSource readsDataSource = this.getReadsDataSource();
ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
ReferenceDataSource referenceDataSource = this.getReferenceDataSource();
// Use monolithic sharding if no index is present. Monolithic sharding is always required for the original
@ -286,7 +286,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
else {
region = new ArrayList<GenomeLoc>();
for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences())
region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
region.add(getGenomeLocParser().createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
}
return new MonolithicShardStrategy(readsDataSource,shardType,region);
@ -309,13 +309,14 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
getGenomeLocParser(),
intervals);
} else
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
referenceDataSource.getReference(),
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE);
SHARD_SIZE,getGenomeLocParser());
} else if (walker instanceof ReadWalker ||
walker instanceof DuplicateWalker) {
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
@ -326,13 +327,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
getGenomeLocParser(),
intervals);
} else {
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
referenceDataSource.getReference(),
shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE);
SHARD_SIZE,
getGenomeLocParser());
}
} else if (walker instanceof ReadPairWalker) {
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
@ -344,7 +347,8 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
referenceDataSource.getReference(),
ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE);
SHARD_SIZE,
getGenomeLocParser());
} else
throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName());

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.BaseUtils;
@ -41,6 +42,11 @@ import net.sf.samtools.util.StringUtil;
public class ReferenceContext {
final public static boolean UPPERCASE_REFERENCE = true;
/**
* Facilitates creation of new GenomeLocs.
*/
private GenomeLocParser genomeLocParser;
/**
* The locus.
*/
@ -101,18 +107,18 @@ public class ReferenceContext {
* @param locus locus of interest.
* @param base reference base at that locus.
*/
public ReferenceContext( GenomeLoc locus, byte base ) {
this( locus, locus, new ForwardingProvider(base) );
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, byte base ) {
this( genomeLocParser, locus, locus, new ForwardingProvider(base) );
}
public ReferenceContext( GenomeLoc locus, GenomeLoc window, byte[] bases ) {
this( locus, window, new ForwardingProvider(bases) );
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, byte[] bases ) {
this( genomeLocParser, locus, window, new ForwardingProvider(bases) );
}
public ReferenceContext( GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) {
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) {
// if( !window.containsP(locus) )
// throw new StingException("Invalid locus or window; window does not contain locus");
this.genomeLocParser = genomeLocParser;
this.locus = locus;
this.window = window;
this.basesProvider = basesProvider;
@ -125,6 +131,10 @@ public class ReferenceContext {
}
}
public GenomeLocParser getGenomeLocParser() {
return genomeLocParser;
}
/**
* The locus currently being examined.
* @return The current locus.

View File

@ -219,8 +219,8 @@ public class VariantContextUtils {
* @param exp expression
* @return true if there is a match
*/
public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
return match(vc,Arrays.asList(exp)).get(exp);
public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, JexlVCMatchExp exp) {
return match(genomeLocParser,vc,Arrays.asList(exp)).get(exp);
}
/**
@ -233,8 +233,8 @@ public class VariantContextUtils {
* @param exps expressions
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(exps,vc);
public static Map<JexlVCMatchExp, Boolean> match(GenomeLocParser genomeLocParser,VariantContext vc, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(genomeLocParser,exps,vc);
}
@ -245,8 +245,8 @@ public class VariantContextUtils {
* @param exp expression
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
return match(vc,g,Arrays.asList(exp)).get(exp);
public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, JexlVCMatchExp exp) {
return match(genomeLocParser,vc,g,Arrays.asList(exp)).get(exp);
}
/**
@ -260,8 +260,8 @@ public class VariantContextUtils {
* @param exps expressions
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(exps,vc,g);
public static Map<JexlVCMatchExp, Boolean> match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(genomeLocParser,exps,vc,g);
}
@ -306,8 +306,8 @@ public class VariantContextUtils {
UNION, INTERSECT
}
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, byte refBase) {
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
}
@ -322,14 +322,14 @@ public class VariantContextUtils {
* @param genotypeMergeOptions
* @return
*/
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
}
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
boolean filteredAreUncalled ) {
@ -357,7 +357,7 @@ public class VariantContextUtils {
// establish the baseline info from the first VC
VariantContext first = VCs.get(0);
String name = first.getSource();
GenomeLoc loc = getLocation(first);
GenomeLoc loc = getLocation(genomeLocParser,first);
Set<Allele> alleles = new TreeSet<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
@ -380,8 +380,8 @@ public class VariantContextUtils {
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
if ( getLocation(vc).size() > loc.size() )
loc = getLocation(vc); // get the longest location
if ( getLocation(genomeLocParser,vc).size() > loc.size() )
loc = getLocation(genomeLocParser,vc); // get the longest location
nFiltered += vc.isFiltered() ? 1 : 0;
nVariant += vc.isVariant() ? 1 : 0;
@ -753,13 +753,13 @@ public class VariantContextUtils {
* @param vc the variant context
* @return the genomeLoc
*/
public static final GenomeLoc getLocation(VariantContext vc) {
return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
}
// NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record
public static VariantContext mergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
if (!mergeIntoMNPvalidationCheck(vc1, vc2))
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
return null;
// Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed):
@ -974,9 +974,9 @@ public class VariantContextUtils {
}
}
private static boolean mergeIntoMNPvalidationCheck(VariantContext vc1, VariantContext vc2) {
GenomeLoc loc1 = VariantContextUtils.getLocation(vc1);
GenomeLoc loc2 = VariantContextUtils.getLocation(vc2);
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2) {
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser,vc1);
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser,vc2);
if (!loc1.onSameContig(loc2))
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");

View File

@ -27,6 +27,7 @@ import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.MapContext;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -49,6 +50,7 @@ import java.util.*;
*/
class VariantJEXLContext implements JexlContext {
private GenomeLocParser genomeLocParser;
// our stored variant context
private VariantContext vc;
@ -73,7 +75,8 @@ class VariantJEXLContext implements JexlContext {
x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }});
}
public VariantJEXLContext(VariantContext vc) {
public VariantJEXLContext(GenomeLocParser genomeLocParser,VariantContext vc) {
this.genomeLocParser = genomeLocParser;
this.vc = vc;
}
@ -119,6 +122,7 @@ class VariantJEXLContext implements JexlContext {
*/
class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
private final GenomeLocParser genomeLocParser;
// our variant context and/or Genotype
private final VariantContext vc;
private final Genotype g;
@ -130,18 +134,19 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
private Map<VariantContextUtils.JexlVCMatchExp,Boolean> jexl;
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc, Genotype g) {
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc, Genotype g) {
this.genomeLocParser = genomeLocParser;
this.vc = vc;
this.g = g;
initialize(jexlCollection);
}
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc) {
this(jexlCollection, vc, null);
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc) {
this(genomeLocParser,jexlCollection, vc, null);
}
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, Genotype g) {
this(jexlCollection, null, g);
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, Genotype g) {
this(genomeLocParser,jexlCollection, null, g);
}
private void initialize(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection) {
@ -159,14 +164,14 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
private void createContext() {
if ( g == null ) {
// todo -- remove dependancy on g to the entire system
jContext = new VariantJEXLContext(vc);
jContext = new VariantJEXLContext(genomeLocParser,vc);
} else {
Map<String, Object> infoMap = new HashMap<String, Object>();
if ( vc != null ) {
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig());
infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart()));
infoMap.put("CHROM", VariantContextUtils.getLocation(genomeLocParser,vc).getContig());
infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(genomeLocParser,vc).getStart()));
infoMap.put("TYPE", vc.getType().toString());
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
/**
* User: hanna
@ -47,7 +48,7 @@ public class AllLocusView extends LocusView {
public AllLocusView(LocusShardDataProvider provider) {
super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator(provider.getLocus());
locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus());
if( locusIterator.hasNext() ) {
// cache next position and next alignment context
nextPosition = locusIterator.next();

View File

@ -97,9 +97,9 @@ public class LocusReferenceView extends ReferenceView {
}
if(bounds != null) {
long expandedStart = getWindowStart( bounds );
long expandedStop = getWindowStop( bounds );
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
int expandedStart = getWindowStart( bounds );
int expandedStop = getWindowStop( bounds );
initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
}
}
@ -123,12 +123,12 @@ public class LocusReferenceView extends ReferenceView {
if ( loc.getContigIndex() != bounds.getContigIndex() )
throw new ReviewedStingException("Illegal attempt to expand reference view bounds to accommodate location on a different contig.");
bounds = GenomeLocParser.createGenomeLoc(bounds.getContigIndex(),
bounds = genomeLocParser.createGenomeLoc(bounds.getContig(),
Math.min(bounds.getStart(),loc.getStart()),
Math.max(bounds.getStop(),loc.getStop()));
long expandedStart = getWindowStart( bounds );
long expandedStop = getWindowStop( bounds );
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
int expandedStart = getWindowStart( bounds );
int expandedStop = getWindowStop( bounds );
initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
}
/**
@ -137,8 +137,8 @@ public class LocusReferenceView extends ReferenceView {
*/
private void initializeBounds(LocusShardDataProvider provider) {
if(provider.getLocus() != null) {
long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
int sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
bounds = genomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
Math.max(provider.getLocus().getStart(),1),
Math.min(provider.getLocus().getStop(),sequenceLength));
}
@ -155,10 +155,10 @@ public class LocusReferenceView extends ReferenceView {
}
protected GenomeLoc trimToBounds(GenomeLoc l) {
long expandedStart = getWindowStart( bounds );
long expandedStop = getWindowStop( bounds );
if ( l.getStart() < expandedStart ) l = GenomeLocParser.setStart(l, expandedStart);
if ( l.getStop() > expandedStop ) l = GenomeLocParser.setStop(l, expandedStop);
int expandedStart = getWindowStart( bounds );
int expandedStop = getWindowStop( bounds );
if ( l.getStart() < expandedStart ) l = genomeLocParser.setStart(l, expandedStart);
if ( l.getStop() > expandedStop ) l = genomeLocParser.setStop(l, expandedStop);
return l;
}
@ -186,7 +186,7 @@ public class LocusReferenceView extends ReferenceView {
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
//validateLocation( genomeLoc );
GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
int refStart = -1;
if (bounds != null) {
@ -200,7 +200,7 @@ public class LocusReferenceView extends ReferenceView {
}
int len = (int)window.size();
return new ReferenceContext( genomeLoc, window, new Provider(refStart, len));
return new ReferenceContext( genomeLocParser, genomeLoc, window, new Provider(refStart, len));
}
/**
@ -228,7 +228,7 @@ public class LocusReferenceView extends ReferenceView {
* @param locus The locus to expand.
* @return The expanded window.
*/
private long getWindowStart( GenomeLoc locus ) {
private int getWindowStart( GenomeLoc locus ) {
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
if(locus.getStart() < 1) return 1;
// if(locus.getStart() < 1) return locus.getStart();
@ -240,9 +240,9 @@ public class LocusReferenceView extends ReferenceView {
* @param locus The locus to expand.
* @return The expanded window.
*/
private long getWindowStop( GenomeLoc locus ) {
private int getWindowStop( GenomeLoc locus ) {
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
long sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength();
int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength();
if(locus.getStop() > sequenceLength) return sequenceLength;
return Math.min( locus.getStop() + windowStop, sequenceLength );
}

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.Collection;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* Presents data sharded by locus to the traversal engine.
@ -22,6 +23,11 @@ public class LocusShardDataProvider extends ShardDataProvider {
*/
private final ReadProperties sourceInfo;
/**
* The parser, used to create and build new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
@ -37,9 +43,10 @@ public class LocusShardDataProvider extends ShardDataProvider {
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,reference,rods);
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,genomeLocParser,reference,rods);
this.sourceInfo = sourceInfo;
this.genomeLocParser = genomeLocParser;
this.locus = locus;
this.locusIterator = locusIterator;
}

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.Collection;
@ -33,6 +34,11 @@ public abstract class LocusView extends LocusIterator implements View {
*/
protected GenomeLoc locus;
/**
* The GenomeLocParser, used to create new genome locs.
*/
protected GenomeLocParser genomeLocParser;
/**
* Source info for this view. Informs the class about downsampling requirements.
*/
@ -53,6 +59,7 @@ public abstract class LocusView extends LocusIterator implements View {
this.locus = provider.getLocus();
this.sourceInfo = provider.getSourceInfo();
this.genomeLocParser = provider.getGenomeLocParser();
this.loci = provider.getLocusIterator();
seedNextLocus();

View File

@ -76,7 +76,7 @@ public class ReadBasedReferenceOrderedView implements View {
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
class WindowedData {
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
private final TreeMap<Long, RODMetaDataContainer> mapping = new TreeMap<Long, RODMetaDataContainer>();
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
// our current location from the last read we processed
private GenomeLoc currentLoc;
@ -109,16 +109,16 @@ class WindowedData {
*/
private void getStates(ShardDataProvider provider, SAMRecord rec) {
long stop = Integer.MAX_VALUE;
int stop = Integer.MAX_VALUE;
// figure out the appropriate alignment stop
if (provider.hasReference()) {
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
}
// calculate the range of positions we need to look at
GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(),
rec.getAlignmentStart(),
stop);
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
rec.getAlignmentStart(),
stop);
states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
@ -144,7 +144,7 @@ class WindowedData {
*/
public ReadMetaDataTracker getTracker(SAMRecord rec) {
updatePosition(rec);
return new ReadMetaDataTracker(rec, mapping);
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
}
/**
@ -154,7 +154,7 @@ class WindowedData {
*/
private void updatePosition(SAMRecord rec) {
if (states == null) getStates(this.provider, rec);
currentLoc = GenomeLocParser.createGenomeLoc(rec);
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
// flush the queue looking for records we've passed over
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())

View File

@ -67,10 +67,10 @@ public class ReadReferenceView extends ReferenceView {
}
public ReferenceContext getReferenceContext( SAMRecord read ) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
// byte[] bases = super.getReferenceBases(loc);
// return new ReferenceContext( loc, loc, bases );
return new ReferenceContext( loc, loc, getReferenceBasesProvider(loc) );
return new ReferenceContext( genomeLocParser, loc, loc, getReferenceBasesProvider(loc) );
}
}

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collection;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* Present data sharded by read to a traversal engine.
@ -26,8 +27,8 @@ public class ReadShardDataProvider extends ShardDataProvider {
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,reference,rods);
public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,genomeLocParser,reference,rods);
this.reads = reads;
}

View File

@ -28,6 +28,11 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
* A view into the reference backing this shard.
*/
public class ReferenceView implements View {
/**
* The parser, used to create and parse GenomeLocs.
*/
protected final GenomeLocParser genomeLocParser;
/**
* The source of reference data.
*/
@ -38,6 +43,7 @@ public class ReferenceView implements View {
* @param provider
*/
public ReferenceView( ShardDataProvider provider ) {
this.genomeLocParser = provider.getGenomeLocParser();
this.reference = provider.getReference();
}
@ -68,7 +74,7 @@ public class ReferenceView implements View {
}
protected byte[] getReferenceBases( SAMRecord read ) {
return getReferenceBases(GenomeLocParser.createGenomeLoc(read));
return getReferenceBases(genomeLocParser.createGenomeLoc(read));
}

View File

@ -80,7 +80,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard
LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
it.seekForward(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it));
@ -128,7 +128,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
tracker = createTracker(allTracksHere);
GenomeLoc rodSite = datum.getLocation();
GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart());
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
if ( DEBUG ) System.out.printf("rodLocusView.next() is at %s%n", site);
@ -167,7 +167,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/
private long getSkippedBases( GenomeLoc currentPos ) {
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop();
Integer compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop();
long skippedBases = currentPos.getStart() - compStop - 1;
if ( skippedBases < -1 ) { // minus 1 value is ok
@ -182,7 +182,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
* @return
*/
public GenomeLoc getLocOneBeyondShard() {
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1);
return genomeLocParser.createGenomeLoc(locus.getContig(),locus.getStop()+1);
}
/**

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
@ -37,6 +38,11 @@ public abstract class ShardDataProvider {
*/
private final Shard shard;
/**
* The parser, used to create and build new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* Provider of reference data for this particular shard.
*/
@ -47,6 +53,14 @@ public abstract class ShardDataProvider {
*/
private final Collection<ReferenceOrderedDataSource> referenceOrderedData;
/**
* Returns the GenomeLocParser associated with this traversal.
* @return The associated parser.
*/
public GenomeLocParser getGenomeLocParser() {
return genomeLocParser;
}
/**
* Retrieves the shard associated with this data provider.
* @return The shard associated with this data provider.
@ -86,8 +100,9 @@ public abstract class ShardDataProvider {
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
this.shard = shard;
this.genomeLocParser = genomeLocParser;
this.reference = reference;
this.referenceOrderedData = rods;
}
@ -96,8 +111,8 @@ public abstract class ShardDataProvider {
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
* @param shard the shard
*/
ShardDataProvider(Shard shard) {
this(shard,null,null);
ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser) {
this(shard,genomeLocParser,null,null);
}
/**

View File

@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public class IntervalSharder {
private static Logger logger = Logger.getLogger(IntervalSharder.class);
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final List<GenomeLoc> loci) {
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
return new FilePointerIterator(dataSource,loci);
}
@ -55,11 +55,13 @@ public class IntervalSharder {
*/
private static class FilePointerIterator implements Iterator<FilePointer> {
final SAMDataSource dataSource;
final GenomeLocSortedSet loci;
final PeekableIterator<GenomeLoc> locusIterator;
final Queue<FilePointer> cachedFilePointers = new LinkedList<FilePointer>();
public FilePointerIterator(final SAMDataSource dataSource, final List<GenomeLoc> loci) {
public FilePointerIterator(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
this.dataSource = dataSource;
this.loci = loci;
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
advance();
}
@ -82,7 +84,7 @@ public class IntervalSharder {
}
private void advance() {
List<GenomeLoc> nextBatch = new ArrayList<GenomeLoc>();
GenomeLocSortedSet nextBatch = new GenomeLocSortedSet(loci.getGenomeLocParser());
String contig = null;
while(locusIterator.hasNext() && nextBatch.isEmpty()) {
@ -99,7 +101,7 @@ public class IntervalSharder {
}
}
private static List<FilePointer> shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final List<GenomeLoc> loci) {
private static List<FilePointer> shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final GenomeLocSortedSet loci) {
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
List<FilePointer> filePointers = new ArrayList<FilePointer>();
FilePointer lastFilePointer = null;
@ -171,7 +173,7 @@ public class IntervalSharder {
final int regionStop = Math.min(locationStop,binStart-1);
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop);
GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop);
lastFilePointer = new FilePointer(subset);
locationStart = regionStop + 1;
@ -184,7 +186,7 @@ public class IntervalSharder {
lastBAMOverlap = null;
}
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop);
GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,locationStop);
filePointers.add(new FilePointer(subset));
locationStart = locationStop + 1;
@ -195,7 +197,7 @@ public class IntervalSharder {
// The start of the region overlaps the bin. Add the overlapping subset.
final int regionStop = Math.min(locationStop,binStop);
lastFilePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop));
lastFilePointer.addLocation(loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop));
locationStart = regionStop + 1;
}
}

View File

@ -58,14 +58,14 @@ public class LocusShardStrategy implements ShardStrategy {
* @param reads Data source from which to load index data.
* @param locations List of locations for which to load data.
*/
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) {
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
this.reads = reads;
if(!reads.isEmpty()) {
List<GenomeLoc> intervals;
GenomeLocSortedSet intervals;
if(locations == null) {
// If no locations were passed in, shard the entire BAM file.
SAMFileHeader header = reads.getHeader();
intervals = new ArrayList<GenomeLoc>();
intervals = new GenomeLocSortedSet(genomeLocParser);
for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) {
// Check this sequence against the reference sequence dictionary.
@ -73,12 +73,12 @@ public class LocusShardStrategy implements ShardStrategy {
SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName());
if(refSequenceRecord != null) {
final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength());
intervals.add(GenomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length));
intervals.add(genomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length));
}
}
}
else
intervals = locations.toList();
intervals = locations;
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
}
@ -89,15 +89,15 @@ public class LocusShardStrategy implements ShardStrategy {
for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)));
filePointers.add(new FilePointer(genomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)));
}
}
}
else {
for(GenomeLoc interval: locations) {
while(interval.size() > maxShardSize) {
filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)));
interval = GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
filePointers.add(new FilePointer(locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)));
interval = locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
}
filePointers.add(new FilePointer(interval));
}

View File

@ -90,7 +90,7 @@ public class ReadShardStrategy implements ShardStrategy {
this.locations = locations;
if(locations != null)
filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations.toList());
filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations);
else
filePointerIterator = filePointers.iterator();

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
@ -50,8 +51,8 @@ public class ShardStrategyFactory {
* @param startingSize the starting size
* @return a shard strategy capable of dividing input data into shards.
*/
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, -1L);
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser) {
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, -1L);
}
/**
@ -64,10 +65,10 @@ public class ShardStrategyFactory {
* @param startingSize the starting size
* @return a shard strategy capable of dividing input data into shards.
*/
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, long limitByCount) {
switch (strat) {
case LOCUS_EXPERIMENTAL:
return new LocusShardStrategy(readsDataSource,referenceDataSource,null);
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
case READS_EXPERIMENTAL:
return new ReadShardStrategy(readsDataSource,null);
default:
@ -87,8 +88,8 @@ public class ShardStrategyFactory {
* @param startingSize the starting size
* @return a shard strategy capable of dividing input data into shards.
*/
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, lst, -1l);
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst) {
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, lst, -1l);
}
@ -102,10 +103,10 @@ public class ShardStrategyFactory {
* @param startingSize the starting size
* @return A strategy for shattering this data.
*/
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst, long limitDataCount) {
switch (strat) {
case LOCUS_EXPERIMENTAL:
return new LocusShardStrategy(readsDataSource,referenceDataSource,lst);
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
case READS_EXPERIMENTAL:
return new ReadShardStrategy(readsDataSource,lst);
default:

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -47,12 +49,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Create a new reference-ordered data source.
* @param rod the reference ordered data
*/
public ReferenceOrderedDataSource( RMDTrack rod, boolean flashbackData ) {
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
this.rod = rod;
if (rod.supportsQuery())
iteratorPool = new ReferenceOrderedQueryDataPool(new RMDTrackBuilder(),rod);
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod);
else
iteratorPool = new ReferenceOrderedDataPool( rod, flashbackData );
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
}
/**
@ -110,7 +112,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
private final RMDTrack rod;
boolean flashbackData = false;
public ReferenceOrderedDataPool( RMDTrack rod, boolean flashbackData ) {
public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
super(sequenceDictionary,genomeLocParser);
this.flashbackData = flashbackData;
this.rod = rod;
}
@ -121,7 +124,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
* @return The newly created resource.
*/
public LocationAwareSeekableRODIterator createNewResource() {
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(rod.getIterator());
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(sequenceDictionary,genomeLocParser,rod.getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter;
}
@ -134,7 +137,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
*/
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
if(segment instanceof MappedStreamSegment) {
GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation();
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
@ -178,14 +181,14 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
* a data pool for the new query based RODs
*/
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, LocationAwareSeekableRODIterator> {
// the reference-ordered data itself.
private final RMDTrack rod;
// our tribble track builder
private final RMDTrackBuilder builder;
public ReferenceOrderedQueryDataPool( RMDTrackBuilder builder, RMDTrack rod ) {
public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) {
super(sequenceDictionary,genomeLocParser);
this.rod = rod;
this.builder = builder;
// a little bit of a hack, but it saves us from re-reading the index from the file
@ -209,9 +212,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, Location
try {
if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
} else {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.iterator(),rod.getName()));
}
} catch (IOException e) {
throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e);

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -26,6 +27,16 @@ import java.util.Map;
* A pool of open resources, all of which can create a closeable iterator.
*/
abstract class ResourcePool <T,I extends Iterator> {
/**
* Sequence dictionary.
*/
protected final SAMSequenceDictionary sequenceDictionary;
/**
* Builder/parser for GenomeLocs.
*/
protected final GenomeLocParser genomeLocParser;
/**
* All iterators of this reference-ordered data.
*/
@ -41,6 +52,11 @@ abstract class ResourcePool <T,I extends Iterator> {
*/
private Map<I,T> resourceAssignments = new HashMap<I,T>();
protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) {
this.sequenceDictionary = sequenceDictionary;
this.genomeLocParser = genomeLocParser;
}
/**
* Get an iterator whose position is before the specified location. Create a new one if none exists.
* @param segment Target position for the iterator.
@ -180,36 +196,11 @@ class MappedStreamSegment implements DataStreamSegment {
* Retrieves the first location covered by a mapped stream segment.
* @return Location of the first base in this segment.
*/
public GenomeLoc getFirstLocation() {
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart());
public GenomeLoc getLocation() {
return locus;
}
public MappedStreamSegment(GenomeLoc locus) {
this.locus = locus;
}
}
/**
* Models a position within the unmapped reads in a stream of GATK input data.
*/
class UnmappedStreamSegment implements DataStreamSegment {
/**
* Where does this region start, given 0 = the position of the first unmapped read.
*/
public final long position;
/**
* How many reads wide is this region? This size is generally treated as an upper bound.
*/
public final long size;
/**
* Create a new target location in an unmapped read stream.
* @param position The 0-based index into the unmapped reads. Position 0 represents the first unmapped read.
* @param size the size of the segment.
*/
public UnmappedStreamSegment( long position, long size ) {
this.position = position;
this.size = size;
}
}

View File

@ -43,6 +43,7 @@ import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -63,17 +64,22 @@ public class SAMDataSource implements SimpleDataSource {
/**
* Runtime metrics of reads filtered, etc.
*/
protected final ReadMetrics readMetrics;
private final ReadMetrics readMetrics;
/**
* Tools for parsing GenomeLocs, for verifying BAM ordering against general ordering.
*/
private final GenomeLocParser genomeLocParser;
/**
* Identifiers for the readers driving this data source.
*/
protected final List<SAMReaderID> readerIDs;
private final List<SAMReaderID> readerIDs;
/**
* How strict are the readers driving this data source.
*/
protected final SAMFileReader.ValidationStringency validationStringency;
private final SAMFileReader.ValidationStringency validationStringency;
/**
* How far along is each reader?
@ -113,9 +119,10 @@ public class SAMDataSource implements SimpleDataSource {
* Create a new SAM data source given the supplied read metadata.
* @param samFiles list of reads files.
*/
public SAMDataSource(List<SAMReaderID> samFiles) {
public SAMDataSource(List<SAMReaderID> samFiles,GenomeLocParser genomeLocParser) {
this(
samFiles,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
@ -145,6 +152,7 @@ public class SAMDataSource implements SimpleDataSource {
*/
public SAMDataSource(
List<SAMReaderID> samFiles,
GenomeLocParser genomeLocParser,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
@ -155,6 +163,7 @@ public class SAMDataSource implements SimpleDataSource {
boolean generateExtendedEvents
) {
this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser;
readerIDs = samFiles;
validationStringency = strictness;
@ -520,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource {
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
// verify the read ordering by applying a sort order iterator
if (!noValidationOfReadOrder && enableVerification)
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));

View File

@ -53,9 +53,9 @@ public class LinearMicroScheduler extends MicroScheduler {
// New experimental code for managing locus intervals.
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods);
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
windowMaker.close();
}
else {
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods);
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();

View File

@ -158,6 +158,12 @@ public abstract class MicroScheduler {
traversalEngine.printOnTraversalDone(metrics);
}
/**
* Gets the engine that created this microscheduler.
* @return The engine owning this microscheduler.
*/
public GenomeAnalysisEngine getEngine() { return engine; }
/**
* Returns data source maintained by this scheduler
* @return

View File

@ -61,11 +61,11 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
ShardDataProvider dataProvider = null;
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
dataProvider.close();
}

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* Buffer shards of data which may or may not contain multiple loci into
@ -51,11 +52,11 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
* @param intervals The set of intervals over which to traverse.
* @param discards a filter at that indicates read position relative to some locus?
*/
public WindowMaker(Shard shard, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards);
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,discards);
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;

View File

@ -22,6 +22,11 @@ import java.util.Iterator;
* Iterates through all of the loci provided in the reference.
*/
public class GenomeLocusIterator implements Iterator<GenomeLoc> {
/**
* Builds individual loci.
*/
private GenomeLocParser parser;
/**
* The entire region over which we're iterating.
*/
@ -38,9 +43,10 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
* @param completeLocus Data provider to use as a backing source.
* Provider must have a reference (hasReference() == true).
*/
public GenomeLocusIterator( GenomeLoc completeLocus ) {
public GenomeLocusIterator( GenomeLocParser parser, GenomeLoc completeLocus ) {
this.parser = parser;
this.completeLocus = completeLocus;
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
this.currentLocus = parser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
}
/**
@ -59,7 +65,7 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
if( !hasNext() )
throw new NoSuchElementException("No elements remaining in bounded reference region.");
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
currentLocus = GenomeLocParser.incPos(currentLocus);
currentLocus = parser.incPos(currentLocus);
return toReturn;
}

View File

@ -61,6 +61,11 @@ public class LocusIteratorByState extends LocusIterator {
// -----------------------------------------------------------------------------------------------------------------
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
/**
* Used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final Collection<String> sampleNames = new ArrayList<String>();
private final ReadStateManager readStates;
@ -129,8 +134,8 @@ public class LocusIteratorByState extends LocusIterator {
public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); }
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
}
public CigarOperator getCurrentCigarOperator() {
@ -268,12 +273,13 @@ public class LocusIteratorByState extends LocusIterator {
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation ) {
this(samIterator, readInformation, NO_FILTERS);
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser ) {
this(samIterator, readInformation, genomeLocParser, NO_FILTERS);
}
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) {
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, List<LocusIteratorFilter> filters ) {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.filters = filters;
// Aggregate all sample names.
sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
@ -310,7 +316,7 @@ public class LocusIteratorByState extends LocusIterator {
}
private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
}
// -----------------------------------------------------------------------------------------------------------------
@ -354,7 +360,7 @@ public class LocusIteratorByState extends LocusIterator {
SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over
// are associated with the *previous* reference base
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
GenomeLoc loc = genomeLocParser.incPos(our1stState.getLocation(genomeLocParser),-1);
boolean hasBeenSampled = false;
for(String sampleName: sampleNames) {

View File

@ -16,11 +16,13 @@ import java.util.Iterator;
* To change this template use File | Settings | File Templates.
*/
public class VerifyingSamIterator implements StingSAMIterator {
private GenomeLocParser genomeLocParser;
StingSAMIterator it;
SAMRecord last = null;
boolean checkOrderP = true;
public VerifyingSamIterator(StingSAMIterator it) {
public VerifyingSamIterator(GenomeLocParser genomeLocParser,StingSAMIterator it) {
this.genomeLocParser = genomeLocParser;
this.it = it;
}
@ -35,27 +37,19 @@ public class VerifyingSamIterator implements StingSAMIterator {
return cur;
}
/**
* If true, enables ordered checking of the reads in the file. By default this is enabled.
* @param checkP If true, sam records will be checked to insure they come in order
*/
public void setCheckOrderP( boolean checkP ) {
checkOrderP = checkP;
}
public void verifyRecord( final SAMRecord last, final SAMRecord cur ) {
private void verifyRecord( final SAMRecord last, final SAMRecord cur ) {
if ( checkOrderP && isOutOfOrder(last, cur) ) {
this.last = null;
throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent:%n%s%n", last.format(), cur.format()) );
}
}
public static boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) {
private boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) {
if ( last == null || cur.getReadUnmappedFlag() )
return false;
else {
GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur );
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
return curLoc.compareTo(lastLoc) == -1;
}
}

View File

@ -43,10 +43,15 @@ import java.util.TreeMap;
* a read-based meta data tracker
*/
public class ReadMetaDataTracker {
/**
* The parser, used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final SAMRecord record;
// the buffer of positions and RODs we've stored
private final TreeMap<Long, RODMetaDataContainer> mapping;
private final TreeMap<Integer, RODMetaDataContainer> mapping;
/**
* create a read meta data tracker, given the read and a queue of RODatum positions
@ -54,7 +59,8 @@ public class ReadMetaDataTracker {
* @param record the read to create offset from
* @param mapping the mapping of reference ordered datum
*/
public ReadMetaDataTracker(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping) {
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
this.genomeLocParser = genomeLocParser;
this.record = record;
this.mapping = mapping;
}
@ -69,13 +75,13 @@ public class ReadMetaDataTracker {
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Long, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> queue, Class cl, String name) {
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
Map<Long, Collection<GATKFeature>> ret = new LinkedHashMap<Long, Collection<GATKFeature>>();
GenomeLoc location = GenomeLocParser.createGenomeLoc(record);
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
int length = record.getReadLength();
for (Long loc : queue.keySet()) {
Long position = loc - location.getStart();
for (Integer loc : queue.keySet()) {
Integer position = loc - location.getStart();
if (position >= 0 && position < length) {
Collection<GATKFeature> set;
if (cl != null)
@ -95,11 +101,11 @@ public class ReadMetaDataTracker {
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Long, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping, Class cl, String name) {
Map<Long, Collection<GATKFeature>> ret = new LinkedHashMap<Long, Collection<GATKFeature>>();
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
int start = record.getAlignmentStart();
int stop = record.getAlignmentEnd();
for (Long location : mapping.keySet()) {
for (Integer location : mapping.keySet()) {
if (location >= start && location <= stop)
if (cl != null)
ret.put(location, mapping.get(location).getSet(cl));
@ -114,7 +120,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping() {
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
return createReadAlignment(record, mapping, null, null);
}
@ -123,7 +129,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping() {
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
return createGenomeLocAlignment(record, mapping, null, null);
}
@ -132,7 +138,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping(String name) {
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
return createReadAlignment(record, mapping, null, name);
}
@ -141,7 +147,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping(String name) {
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
return createGenomeLocAlignment(record, mapping, null, name);
}
@ -150,7 +156,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
return createReadAlignment(record, mapping, cl, null);
}
@ -159,7 +165,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
return createGenomeLocAlignment(record, mapping, cl, null);
}
}

View File

@ -22,18 +22,21 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
package org.broadinstitute.sting.gatk.refdata;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* A suite of utilities for working with the GenomeLocParser
* in the context of the sequence dictionary.
* An interface marking that a given Tribble feature/codec is actually dependent on context within the
* reference, rather than having a dependency only on the contig, start, and stop of the given feature.
* A HACK. Tribble should contain all the information in needs to decode the unqualified position of
* a feature.
*/
public class GenomeLocParserTestUtils {
public interface ReferenceDependentFeatureCodec<T extends org.broad.tribble.Feature> extends FeatureCodec<T> {
/**
* Clear out the sequence dictionary associated with
* the genomeloc creator.
* Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features.
* @param genomeLocParser The parser to supply.
*/
public static void clearSequenceDictionary() {
GenomeLocParser.clearRefContigOrdering();
}
public void setGenomeLocParser(GenomeLocParser genomeLocParser);
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
@ -38,21 +39,26 @@ import java.util.List;
* To change this template use File | Settings | File Templates.
*/
public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
/**
* The parser, used to construct new genome locs.
*/
private final GenomeLocParser parser;
private PushbackIterator<GATKFeature> it;
List<GATKFeature> records = null; // here we will keep a pile of records overlaping with current position; when we iterate
// and step out of record's scope, we purge it from the list
String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator.
long curr_position = 0; // where the iterator is currently positioned on the genome
long max_position = 0; // the rightmost stop position of currently loaded records
int curr_contig = -1; // what contig the iterator is currently on
int curr_position = 0; // where the iterator is currently positioned on the genome
int max_position = 0; // the rightmost stop position of currently loaded records
String curr_contig = null; // what contig the iterator is currently on
boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1
// the stop position of the last query. We can query only in forward direction ("seek forward");
// it is not only the start position of every successive query that can not be before the start
// of the previous one (curr_start), but it is also illegal for a query interval to *end* before
// the end of previous query, otherwise we can end up in an inconsistent state
long curr_query_end = -1;
int curr_query_end = -1;
// EXAMPLE of inconsistency curr_query_end guards against:
// record 1 record 2
@ -80,7 +86,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
// but re-enables next() again after a length-1 query.
public SeekableRODIterator(CloseableIterator<GATKFeature> it) {
public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
this.parser = parser;
this.it = new PushbackIterator<GATKFeature>(it);
records = new LinkedList<GATKFeature>();
// the following is a trick: we would like the iterator to know the actual name assigned to
@ -91,6 +98,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
GATKFeature r = null;
if (this.it.hasNext()) r = this.it.element();
name = (r==null?null:r.getName());
curr_contig = dictionary.getSequence(0).getSequenceName();
}
/**
@ -111,14 +120,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance
// upon next call to next().
public GenomeLoc peekNextLocation() {
if ( curr_position + 1 <= max_position ) return GenomeLocParser.createGenomeLoc(curr_contig,curr_position+1);
if ( curr_position + 1 <= max_position ) return parser.createGenomeLoc(curr_contig,curr_position+1);
// sorry, next reference position is not covered by the RODs we are currently holding. In this case,
// the location we will jump to upon next call to next() is the start of the next ROD record that we did
// not read yet:
if ( it.hasNext() ) {
GATKFeature r = it.element(); // peek, do not load!
return GenomeLocParser.createGenomeLoc(r.getLocation().getContigIndex(),r.getLocation().getStart());
return parser.createGenomeLoc(r.getLocation().getContig(),r.getLocation().getStart());
}
return null; // underlying iterator has no more records, there is no next location!
}
@ -147,7 +156,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
records.clear();
GATKFeature r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe
records.add( r );
curr_contig = r.getLocation().getContigIndex();
curr_contig = r.getLocation().getContig();
curr_position = r.getLocation().getStart();
max_position = r.getLocation().getStop();
}
@ -163,11 +172,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
it.next();
continue;
}
int that_contig = r.getLocation().getContigIndex();
if ( curr_contig > that_contig )
GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
GenomeLoc thatContig = r.getLocation();
if ( currentContig.isPast(thatContig) )
throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() +
" occurs out of order in track " + r.getName() );
if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet...
if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet...
if ( r.getLocation().getStart() < curr_position )
throw new UserException("LocationAwareSeekableRODIterator: track "+r.getName() +
@ -177,7 +189,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
r = it.next(); // we got here only if we do need next record, time to load it for real
long stop = r.getLocation().getStop();
int stop = r.getLocation().getStop();
if ( stop < curr_position ) throw new ReviewedStingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen
if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records
records.add(r);
@ -186,7 +198,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// 'records' and current position are fully updated. Last, we need to set the location of the whole track
// (collection of ROD records) to the genomic site we are currently looking at, and return the list
return new RODRecordListImpl(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position));
return new RODRecordListImpl(name,records, parser.createGenomeLoc(curr_contig,curr_position));
}
/**
@ -218,13 +230,13 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
* @return Current ending position of the iterator, or null if no position exists.
*/
public GenomeLoc position() {
if ( curr_contig < 0 ) return null;
if ( curr_contig == null ) return null;
if ( curr_query_end > curr_position ) {
// do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed
return GenomeLocParser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end);
return parser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end);
}
else {
return GenomeLocParser.createGenomeLoc(curr_contig,curr_position);
return parser.createGenomeLoc(curr_contig,curr_position);
}
}
@ -256,10 +268,11 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
*/
public RODRecordList seekForward(GenomeLoc interval) {
if ( interval.getContigIndex() < curr_contig )
if ( interval.isBefore(parser.createOverEntireContig(curr_contig)) &&
!(interval.getStart() == 0 && interval.getStop() == 0 && interval.getContig().equals(curr_contig)) ) // This criteria is syntactic sugar for 'seek to right before curr_contig'
throw new ReviewedStingException("Out of order query: query contig "+interval.getContig()+" is located before "+
"the iterator's current contig");
if ( interval.getContigIndex() == curr_contig ) {
if ( interval.getContig().equals(curr_contig) ) {
if ( interval.getStart() < curr_position )
throw new ReviewedStingException("Out of order query: query position "+interval +" is located before "+
"the iterator's current position "+curr_contig + ":" + curr_position);
@ -273,7 +286,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1
if ( interval.getContigIndex() == curr_contig && curr_position <= max_position ) {
if ( interval.getContig().equals(curr_contig) && curr_position <= max_position ) {
// some of the intervals we are currently keeping do overlap with the query interval
purgeOutOfScopeRecords();
@ -281,7 +294,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// clean up and get ready for fast-forwarding towards the requested position
records.clear();
max_position = -1;
curr_contig = interval.getContigIndex();
curr_contig = interval.getContig();
}
// curr_contig and curr_position are set to where we asked to scroll to
@ -289,10 +302,12 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
while ( it.hasNext() ) {
GATKFeature r = it.next();
if ( r == null ) continue;
int that_contig = r.getLocation().getContigIndex();
if ( curr_contig > that_contig ) continue; // did not reach requested contig yet
if ( curr_contig < that_contig ) {
GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
GenomeLoc thatContig = r.getLocation();
if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet
if ( currentContig.isBefore(thatContig) ) {
it.pushback(r); // next record is on the higher contig, we do not need it yet...
break;
}
@ -340,4 +355,5 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
public void close() {
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
}
}

View File

@ -181,7 +181,7 @@ public class VariantContextAdaptors {
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
alleles.add(refAllele);
VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
VariantContext vc = VariantContextUtils.toVC(name, ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
return vc;
} else
return null; // can't handle anything else

View File

@ -33,15 +33,15 @@ import java.util.StringTokenizer;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTableFeature> {
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
@ -49,6 +49,19 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
private ArrayList<String> header;
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
/**
* Parses the header.
*
@ -80,9 +93,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
GenomeLoc loc;
String chr = st.nextToken();
if ( chr.indexOf(":") != -1 )
loc = GenomeLocParser.parseGenomeInterval(chr);
loc = genomeLocParser.parseGenomeInterval(chr);
else
loc = GenomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
return new AnnotatorInputTableFeature(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
}
@ -107,9 +120,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
GenomeLoc loc;
if ( values.get(0).indexOf(":") != -1 )
loc = GenomeLocParser.parseGenomeInterval(values.get(0));
loc = genomeLocParser.parseGenomeInterval(values.get(0));
else
loc = GenomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
//parse the location
feature.setChr(loc.getContig());

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import java.io.File;
@ -37,10 +36,11 @@ import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class BeagleCodec implements FeatureCodec<BeagleFeature> {
public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> {
private String[] header;
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
private BeagleReaderType readerType;
@ -52,6 +52,19 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
private static final String delimiterRegex = "\\s+";
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
public Feature decodeLoc(String line) {
return decode(line);
}
@ -147,17 +160,6 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
private static GenomeLoc parseMarkerName(String markerName) {
Matcher m = MARKER_PATTERN.matcher(markerName);
if ( m.matches() ) {
String contig = m.group(1);
long start = Long.valueOf(m.group(2));
return GenomeLocParser.createGenomeLoc(contig, start, start);
} else {
throw new IllegalArgumentException("Malformatted marker string: " + markerName + " required format is chrN:position");
}
}
@Override
public Class<BeagleFeature> getFeatureType() {
return BeagleFeature.class;
@ -175,7 +177,7 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
BeagleFeature bglFeature = new BeagleFeature();
final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
//parse the location: common to all readers
bglFeature.setChr(loc.getContig());

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.refdata.features.refseq;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -13,7 +13,21 @@ import java.util.ArrayList;
/**
* the ref seq codec
*/
public class RefSeqCodec implements FeatureCodec {
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
@Override
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
@Override
public Feature decodeLoc(String line) {
@ -21,19 +35,19 @@ public class RefSeqCodec implements FeatureCodec {
String fields[] = line.split("\t");
if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length);
String contig_name = fields[2];
return new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
}
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
@Override
public Feature decode(String line) {
public RefSeqFeature decode(String line) {
if (line.startsWith("#")) return null;
String fields[] = line.split("\t");
// we reference postion 15 in the split array below, make sure we have at least that many columns
if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
String contig_name = fields[2];
RefSeqFeature feature = new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
feature.setTranscript_id(fields[1]);
if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
@ -41,8 +55,8 @@ public class RefSeqCodec implements FeatureCodec {
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
feature.setTranscript_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
feature.setTranscript_coding_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
feature.setTranscript_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
feature.setTranscript_coding_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
feature.setGene_name(fields[12]);
String[] exon_starts = fields[9].split(",");
String[] exon_stops = fields[10].split(",");
@ -57,7 +71,7 @@ public class RefSeqCodec implements FeatureCodec {
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
exon_frames.add(Integer.decode(eframes[i]));
}

View File

@ -25,15 +25,8 @@ public class RefSeqFeature implements Transcript, Feature {
private List<Integer> exon_frames;
private String name;
// store the contig, start, and stop for this record
private final String contig;
private final int start;
private final int stop;
public RefSeqFeature(String contig, int start, int stop) {
this.contig = contig;
this.start = start;
this.stop = stop;
public RefSeqFeature(GenomeLoc genomeLoc) {
this.transcript_interval = genomeLoc;
}
/** Returns id of the transcript (RefSeq NM_* id) */
@ -44,8 +37,6 @@ public class RefSeqFeature implements Transcript, Feature {
/** Returns transcript's full genomic interval (includes all exons with UTRs) */
public GenomeLoc getLocation() {
if (transcript_interval == null)
transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop);
return transcript_interval;
}
@ -270,16 +261,16 @@ public class RefSeqFeature implements Transcript, Feature {
@Override
public String getChr() {
return contig;
return transcript_interval.getContig();
}
@Override
public int getStart() {
return start;
return transcript_interval.getStart();
}
@Override
public int getEnd() {
return stop;
return transcript_interval.getStop();
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -14,12 +15,27 @@ import java.util.*;
/**
* implementation of a simple table (tab or comma delimited format) input files... more improvements to come
*/
public class TableCodec implements FeatureCodec {
public class TableCodec implements ReferenceDependentFeatureCodec {
private String delimiterRegex = "\\s+";
private String headerDelimiter = "HEADER";
private String commentDelimiter = "#";
private ArrayList<String> header = new ArrayList<String>();
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
@Override
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
@Override
public Feature decodeLoc(String line) {
return decode(line);
@ -34,7 +50,7 @@ public class TableCodec implements FeatureCodec {
throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format");
return new TableFeature(GenomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
}
@Override

View File

@ -30,6 +30,7 @@ import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
@ -59,6 +60,11 @@ public class RMDTrack {
// our sequence dictionary, which can be null
private final SAMSequenceDictionary dictionary;
/**
* Parser to use when creating/parsing GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
// our codec type
private final FeatureCodec codec;
@ -101,13 +107,14 @@ public class RMDTrack {
* @param dict the sam sequence dictionary
* @param codec the feature codec we use to decode this type
*/
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
this.type = type;
this.recordType = codec.getFeatureType();
this.name = name;
this.file = file;
this.reader = reader;
this.dictionary = dict;
this.genomeLocParser = genomeLocParser;
this.codec = codec;
}
@ -117,7 +124,7 @@ public class RMDTrack {
*/
public CloseableIterator<GATKFeature> getIterator() {
try {
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.iterator(),this.getName());
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e);
}
@ -133,19 +140,19 @@ public class RMDTrack {
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
}
public void close() {

View File

@ -35,11 +35,13 @@ import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.source.CachingFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.classloader.PluginManager;
@ -80,18 +82,34 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
// private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null;
/**
* Private genome loc parser to use when building out new locs.
*/
private GenomeLocParser genomeLocParser;
/** Create a new plugin manager. */
public RMDTrackBuilder() {
super(FeatureCodec.class, "Codecs", "Codec");
}
/**
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
* @param dict
* @param genomeLocParser
*/
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
super(FeatureCodec.class, "Codecs", "Codec");
setSequenceDictionary(dict,genomeLocParser);
}
/**
*
* @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups
*/
public void setSequenceDictionary(SAMSequenceDictionary dict) {
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
this.dict = dict;
}
this.genomeLocParser = genomeLocParser;
}
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
@ -115,6 +133,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
/**
* create a RMDTrack of the specified type
*
* @param genomeLocParser GenomeLocParser to use, if case track needs additional reference context.
* @param targetClass the target class of track
* @param name what to call the track
* @param inputFile the input file
@ -127,7 +146,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
// return a feature reader track
Pair<FeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name));
}
/**
@ -186,6 +205,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
FeatureCodec codex = this.createByType(targetClass);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if(codex instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.Feature;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Iterator;
@ -39,10 +40,12 @@ import java.util.Iterator;
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
*/
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final GenomeLocParser genomeLocParser;
private final CloseableTribbleIterator<Feature> iterator;
private final String name;
public FeatureToGATKFeatureIterator(CloseableTribbleIterator<Feature> iter, String name) {
public FeatureToGATKFeatureIterator(GenomeLocParser genomeLocParser,CloseableTribbleIterator<Feature> iter, String name) {
this.genomeLocParser = genomeLocParser;
this.name = name;
this.iterator = iter;
}
@ -54,7 +57,7 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeatu
@Override
public GATKFeature next() {
return new GATKFeature.TribbleGATKFeature(iterator.next(),name);
return new GATKFeature.TribbleGATKFeature(genomeLocParser,iterator.next(),name);
}
@Override

View File

@ -62,15 +62,17 @@ public abstract class GATKFeature implements Feature {
* wrapping a Tribble feature in a GATK friendly interface
*/
public static class TribbleGATKFeature extends GATKFeature {
private final GenomeLocParser genomeLocParser;
private final Feature feature;
private GenomeLoc position = null;
public TribbleGATKFeature(Feature f, String name) {
public TribbleGATKFeature(GenomeLocParser genomeLocParser,Feature f, String name) {
super(name);
this.genomeLocParser = genomeLocParser;
feature = f;
}
public GenomeLoc getLocation() {
if (position == null) position = GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
if (position == null) position = genomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
return position;
}

View File

@ -48,19 +48,22 @@ import java.util.Iterator;
* to parse a string.
*/
public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
private GenomeLocParser genomeLocParser;
private PushbackIterator<String> it = null;
public enum FORMAT { BED, GATK };
FORMAT myFormat = FORMAT.GATK;
public StringToGenomeLocIteratorAdapter(Iterator<String> it, FORMAT format) {
public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator<String> it, FORMAT format) {
this.genomeLocParser = genomeLocParser;
this.it = new PushbackIterator<String>(it);
myFormat = format;
}
public StringToGenomeLocIteratorAdapter(Iterator<String> it ) {
this(it,FORMAT.GATK);
public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator<String> it ) {
this(genomeLocParser,it,FORMAT.GATK);
}
public boolean hasNext() {
@ -81,8 +84,8 @@ public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
public GenomeLoc next() {
if ( myFormat == FORMAT.GATK ) return GenomeLocParser.parseGenomeInterval( it.next() );
return BedParser.parseLocation( it.next() );
if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() );
return BedParser.parseLocation( genomeLocParser,it.next() );
}
public void remove() {

View File

@ -48,7 +48,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
private GenomeAnalysisEngine engine;
protected GenomeAnalysisEngine engine;
/**
* Gets the named traversal type associated with the given traversal.
@ -74,7 +74,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
public void printProgress(Shard shard,GenomeLoc loc) {
// A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false);
}

View File

@ -59,12 +59,12 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
}
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
l.add(read);
for (SAMRecord read2 : iter) {
GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2);
GenomeLoc site2 = engine.getGenomeLocParser().createGenomeLoc(read2);
// the next read starts too late
if (site2.getStart() != site.getStart()) {
@ -114,7 +114,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
if ( read.getReadPairedFlag() ) {
// paired
final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
for (List<SAMRecord> reads : readSets) {
SAMRecord key = reads.get(0);
@ -123,7 +123,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
// share a mate location or the read is flagged as a duplicate
if ( read.getAlignmentStart() == key.getAlignmentStart() && key.getReadPairedFlag() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) ) {
// at least one has to be marked as a duplicate
final GenomeLoc keyMateLoc = GenomeLocParser.createGenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
final GenomeLoc keyMateLoc = engine.getGenomeLocParser().createGenomeLoc(key.getMateReferenceName(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
if ( readMateLoc.compareTo(keyMateLoc) == 0 ) {
// we are at the same position as the dup and have the same mat pos, it's a dup
if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s %d %s vs. %s", read, reads.size(), readMateLoc, keyMateLoc));
@ -176,7 +176,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
*/
for (SAMRecord read : iter) {
// get the genome loc from the read
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));

View File

@ -57,7 +57,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
// associated with the current site), we need to update the location. The updated location still starts
// at the current genomic position, but it has to span the length of the longest deletion (if any).
location = GenomeLocParser.setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that

View File

@ -102,7 +102,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
sum = walker.reduce(x, sum);
}
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart());
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
printProgress(dataProvider.getShard(),locus);
}
return sum;

View File

@ -225,7 +225,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
vcfWriter.add(annotatedVC, ref.getBase());
} else {
// check to see if the buffered context is different (in location) this context
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) {
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) {
for ( VariantContext annotatedVC : indelBufferContext )
vcfWriter.add(annotatedVC, ref.getBase());
indelBufferContext = annotatedVCs;

View File

@ -165,10 +165,10 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
}
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC);
GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC);
beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()));
if ( beagleGenotypesWriter != null ) {
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC).toString()));
}
for ( Allele allele : preferredVC.getAlleles() ) {

View File

@ -100,10 +100,12 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
}
public static class CallableBaseState {
public GenomeLocParser genomeLocParser;
public GenomeLoc loc;
public CalledState state;
public CallableBaseState(GenomeLoc loc, CalledState state) {
public CallableBaseState(GenomeLocParser genomeLocParser,GenomeLoc loc, CalledState state) {
this.genomeLocParser = genomeLocParser;
this.loc = loc;
this.state = state;
}
@ -126,7 +128,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
* @param newStop
*/
public void update(GenomeLoc newStop) {
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), newStop.getStop());
loc = genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), newStop.getStop());
}
public String toString() {
@ -174,7 +176,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
}
}
return new CallableBaseState(context.getLocation(), state);
return new CallableBaseState(getToolkit().getGenomeLocParser(),context.getLocation(), state);
}
public Integrator reduce(CallableBaseState state, Integrator integrator) {

View File

@ -98,9 +98,9 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
}
FullBEDFeature bed = (FullBEDFeature)bindings.get(0);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd());
GenomeLoc loc = getToolkit().getGenomeLocParser().createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd());
CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName());
return new CallableLociWalker.CallableBaseState(loc, state);
return new CallableLociWalker.CallableBaseState(getToolkit().getGenomeLocParser(),loc, state);
}
// --------------------------------------------------------------------------------------------------------------

View File

@ -406,7 +406,10 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
RMDTrackBuilder builder = new RMDTrackBuilder();
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
try {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),
refseq.iterator(),"refseq"));
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(refSeqGeneList, "Unable to open file", e);
}

View File

@ -81,7 +81,7 @@ public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, Gen
}
// otherwise, merge them
else {
sum = GenomeLocParser.setStop(sum, value.first.getStop());
sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
fasta.append(value.second);
}
return sum;

View File

@ -2,13 +2,16 @@ package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
public class ClusteredSnps {
private GenomeLocParser genomeLocParser;
private int window = 10;
private int snpThreshold = 3;
public ClusteredSnps(int snpThreshold, int window) {
public ClusteredSnps(GenomeLocParser genomeLocParser,int snpThreshold, int window) {
this.genomeLocParser = genomeLocParser;
this.window = window;
this.snpThreshold = snpThreshold;
if ( window < 1 || snpThreshold < 1 )
@ -29,7 +32,7 @@ public class ClusteredSnps {
throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details");
// find the nth variant
GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext());
GenomeLoc left = VariantContextUtils.getLocation(genomeLocParser,variants[i].getVariantContext());
GenomeLoc right = null;
int snpsSeen = 1;
@ -37,7 +40,7 @@ public class ClusteredSnps {
while ( ++currentIndex < variants.length ) {
if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) {
if ( ++snpsSeen == snpThreshold ) {
right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext());
right = VariantContextUtils.getLocation(genomeLocParser,variants[currentIndex].getVariantContext());
break;
}
}

View File

@ -117,7 +117,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
public void initialize() {
if ( clusterWindow > 0 )
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
@ -188,7 +188,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
if ( VariantContextUtils.match(vc, g, exp) )
if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, g, exp) )
filters.add(exp.name);
}
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased()));
@ -211,7 +211,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
filters.add(CLUSTERED_SNP_FILTER_NAME);
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
if ( VariantContextUtils.match(vc, exp) )
if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, exp) )
filters.add(exp.name);
}

View File

@ -133,7 +133,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
}
// merge the variant contexts
return VariantContextUtils.simpleMerge(calls, ref.getBase());
return VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), calls, ref.getBase());
}
public static AlignmentContext filterForSamples(ReadBackedPileup pileup, Set<String> samples) {

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.contexts.*;
import java.util.*;
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
private final GenomeLocParser genomeLocParser;
private int MIN_COVERAGE = 6;
private double MIN_FRACTION = 0.3;
@ -20,7 +21,9 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
// the previous normal event context
// private Map<String, StratifiedAlignmentContext> cachedContext;
protected SimpleIndelCalculationModel() {}
protected SimpleIndelCalculationModel(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
private int totalIndels = 0;
private int totalCoverage = 0;
@ -70,7 +73,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
if ( bestEvent.charAt(0) == '-' ) {
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
alleles.add( Allele.create(bestEvent.substring(1), true ));
loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1);
loc = genomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1);
} else
throw new ReviewedStingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
}

View File

@ -239,7 +239,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e);
}
@ -257,7 +259,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
int nNorm = 0;
int nTum = 0;
for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) {
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
List<String> tags = rid.getTags() ;
if ( tags.isEmpty() && call_somatic )
throw new UserException.BadInput("In somatic mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+
@ -297,12 +299,12 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
genotypeIntervals = new IntervalFileMergingIterator(
genotypeIntervals = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
} else {
// read in the whole list of intervals for cleaning
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(
IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
genotypeIntervals = locs.iterator();
}
currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null;
@ -310,7 +312,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
}
location = GenomeLocParser.createGenomeLoc(0,1);
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
// List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
// List<Set<String>> sampleSets = getToolkit().getSamplesByReaders();
@ -387,8 +389,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
currentPosition = read.getAlignmentStart();
refName = new String(read.getReferenceName());
location = GenomeLocParser.setContig(location,refName);
contigLength = GenomeLocParser.getContigInfo(refName).getSequenceLength();
location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop());
contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength();
outOfContigUserWarned = false;
normal_context.clear(); // reset coverage window; this will also set reference position to 0
@ -543,7 +545,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
}
long move_to = adjustedPosition;
for ( long pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels
@ -579,8 +581,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
// if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
if ( right > normal_context.getStop() ) right = normal_context.getStop();
location = GenomeLocParser.setStart(location,pos);
location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
location = getToolkit().getGenomeLocParser().setStart(location,pos);
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
if ( normalCall.isCall() ) {
normalCallsMade++;
@ -692,7 +694,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop());
for ( long pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor
@ -735,8 +737,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
location = GenomeLocParser.setStart(location,pos);
location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
location = getToolkit().getGenomeLocParser().setStart(location,pos);
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
if ( tumorCall.isCall() ) {
tumorCallsMade++;
@ -1395,13 +1397,13 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
class WindowContext implements IndelListener {
private Set<ExpandedSAMRecord> reads;
private long start=0; // where the window starts on the ref, 1-based
private int start=0; // where the window starts on the ref, 1-based
private CircularArray< List< IndelVariant > > indels;
private List<IndelVariant> emptyIndelList = new ArrayList<IndelVariant>();
public WindowContext(long start, int length) {
public WindowContext(int start, int length) {
this.start = start;
indels = new CircularArray< List<IndelVariant> >(length);
// reads = new LinkedList<SAMRecord>();
@ -1412,13 +1414,13 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
*
* @return
*/
public long getStart() { return start; }
public int getStart() { return start; }
/** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for.
*
* @return
*/
public long getStop() { return start + indels.length() - 1; }
public int getStop() { return start + indels.length() - 1; }
/** Resets reference start position to 0 and clears the context.
*

View File

@ -211,9 +211,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for (String fileOrInterval : intervalsFile.split(";")) {
// if it's a file, add items to raw interval list
if (IntervalUtils.isIntervalFile(fileOrInterval)) {
merger.add(new IntervalFileMergingIterator( new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
} else {
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval));
}
}
if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator());
@ -221,7 +221,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
intervals = merger;
} else {
// read in the whole list of intervals for cleaning
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = locs.iterator();
}
currentInterval = intervals.hasNext() ? intervals.next() : null;
@ -239,9 +239,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
nwayWriters = new HashMap<SAMReaderID,SAMFileWriter>();
for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) {
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
String fName = getToolkit().getDataSource().getSAMFile(rid).getName();
String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName();
int pos ;
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
@ -383,10 +383,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
return 0;
}
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(read);
// hack to get around unmapped reads having screwy locations
if ( readLoc.getStop() == 0 )
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart());
readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) {
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
@ -1414,7 +1414,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
}
private static class ReadBin {
private class ReadBin {
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
private byte[] reference = null;
@ -1426,11 +1426,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// This can happen if e.g. there's a large known indel with no overlapping reads.
public void add(SAMRecord read) {
GenomeLoc locForRead = GenomeLocParser.createGenomeLoc(read);
GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read);
if ( loc == null )
loc = locForRead;
else if ( locForRead.getStop() > loc.getStop() )
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), locForRead.getStop());
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), loc.getStart(), locForRead.getStop());
reads.add(read);
}
@ -1441,9 +1441,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// set up the reference if we haven't done so yet
if ( reference == null ) {
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
long padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1);
long padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), padLeft, padRight);
int padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1);
int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
StringUtil.toUpperCase(reference);
}

View File

@ -92,7 +92,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
boolean hasInsertion = false;
boolean hasPointEvent = false;
long furthestStopPos = -1;
int furthestStopPos = -1;
// look for insertions in the extended context (we'll get deletions from the normal context)
if ( context.hasExtendedEventPileup() ) {
@ -175,9 +175,9 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
GenomeLoc eventLoc = context.getLocation();
if ( hasInsertion )
eventLoc = GenomeLocParser.createGenomeLoc(eventLoc.getContigIndex(), eventLoc.getStart(), eventLoc.getStart()+1);
eventLoc = getToolkit().getGenomeLocParser().createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart()+1);
else if ( hasIndel && !context.hasBasePileup() )
eventLoc = GenomeLocParser.createGenomeLoc(eventLoc.getContigIndex(), eventLoc.getStart(), furthestStopPos);
eventLoc = getToolkit().getGenomeLocParser().createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), furthestStopPos);
EVENT_TYPE eventType = (hasIndel ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT) : EVENT_TYPE.POINT_EVENT);
@ -217,15 +217,15 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
class Event {
public long furthestStopPos;
public int furthestStopPos;
public GenomeLoc loc;
public long eventStartPos;
private long eventStopPos;
public int eventStartPos;
private int eventStopPos;
private EVENT_TYPE type;
private ArrayList<Long> pointEvents = new ArrayList<Long>();
private ArrayList<Integer> pointEvents = new ArrayList<Integer>();
public Event(GenomeLoc loc, long furthestStopPos, EVENT_TYPE type) {
public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) {
this.loc = loc;
this.furthestStopPos = furthestStopPos;
this.type = type;
@ -254,9 +254,9 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
}
if ( e.type == EVENT_TYPE.POINT_EVENT || e.type == EVENT_TYPE.BOTH ) {
long newPosition = e.pointEvents.get(0);
int newPosition = e.pointEvents.get(0);
if ( pointEvents.size() > 0 ) {
long lastPosition = pointEvents.get(pointEvents.size()-1);
int lastPosition = pointEvents.get(pointEvents.size()-1);
if ( newPosition - lastPosition < windowSize ) {
eventStopPos = Math.max(eventStopPos, newPosition);
furthestStopPos = e.furthestStopPos;
@ -272,7 +272,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
}
public boolean isReportableEvent() {
return GenomeLocParser.validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
return getToolkit().getGenomeLocParser().validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
}
public String toString() {

View File

@ -33,6 +33,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -44,6 +45,8 @@ import java.util.*;
public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter {
private VCFWriter innerWriter;
private GenomeLocParser genomeLocParser;
private ReferenceSequenceFile referenceFileForMNPmerging;
private int maxGenomicDistanceForMNP;
@ -64,8 +67,9 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
// Should we call innerWriter.close() in close()
private boolean takeOwnershipOfInner;
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
this.innerWriter = innerWriter;
this.genomeLocParser = genomeLocParser;
this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile);
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
this.useSingleSample = singleSample;
@ -83,8 +87,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
this.takeOwnershipOfInner = takeOwnershipOfInner;
}
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
this(innerWriter, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
this(innerWriter, genomeLocParser, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
}
public void writeHeader(VCFHeader header) {
@ -113,7 +117,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
return;
}
logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc));
logger.debug("Next VC input = " + VariantContextUtils.getLocation(genomeLocParser,vc));
boolean curVcIsNotFiltered = vc.isNotFiltered();
if (vcfrWaitingToMerge == null) {
@ -123,20 +127,20 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!");
if (curVcIsNotFiltered) { // still need to wait before can release vc
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc));
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser,vc));
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
}
else if (!emitOnlyMergedRecords) { // filtered records are never merged
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc));
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser,vc));
innerWriter.add(vc, refBase);
}
}
else { // waiting to merge vcfrWaitingToMerge
logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc));
logger.debug("Waiting to merge " + VariantContextUtils.getLocation(genomeLocParser,vcfrWaitingToMerge.vc));
if (!curVcIsNotFiltered) {
if (!emitOnlyMergedRecords) { // filtered records are never merged
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc));
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser,vc));
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
}
}
@ -164,7 +168,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
boolean mergedRecords = false;
if (mergeDistanceInRange) {
numRecordsWithinDistance++;
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser,vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
if (mergedVc != null) {
mergedRecords = true;
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
@ -209,8 +213,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
return numMergedRecords;
}
public static int minDistance(VariantContext vc1, VariantContext vc2) {
return VariantContextUtils.getLocation(vc1).minDistance(VariantContextUtils.getLocation(vc2));
public int minDistance(VariantContext vc1, VariantContext vc2) {
return VariantContextUtils.getLocation(genomeLocParser,vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser,vc2));
}
/**
@ -354,10 +358,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
aas.segregationUnknown++;
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
}
else if (gt1.isHomRef() || gt2.isHomRef()) {
logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
aas.eitherNotVariant++;
}
else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant):
@ -386,7 +390,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
// Check MNPs vs. CHets:
if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) {
logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet()))
throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!");

View File

@ -78,7 +78,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
private void initializeVcfWriter() {
// false <-> don't take control of writer, since didn't create it:
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
// setup the header fields:

View File

@ -138,7 +138,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
VCFWriter origWriter = writer;
if (enableMergePhasedSegregatingPolymorphismsToMNP) // null <-> use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged:
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false);
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false);
/* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow
Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow
@ -197,7 +197,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
VariantAndReads vr = new VariantAndReads(vc, context);
unphasedSiteQueue.add(vr);
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(vr.variant));
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
}
else {
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
@ -226,7 +226,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
while (!unphasedSiteQueue.isEmpty()) {
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(nextToPhaseVc))) {
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextToPhaseVc))) {
/* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc
(note that we ASSUME that the VCF is ordered by <contig,locus>).
Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself.
@ -240,7 +240,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
logger.debug("oldPhasedList(1st) = " + toStringVCL(oldPhasedList));
VariantAndReads vr = unphasedSiteQueue.remove();
logger.debug("Performing phasing for " + VariantContextUtils.getLocation(vr.variant));
logger.debug("Performing phasing for " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
phaseSite(vr, phaseStats);
}
@ -259,7 +259,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
GenomeLoc nextToPhaseLoc = null;
if (!unphasedSiteQueue.isEmpty())
nextToPhaseLoc = VariantContextUtils.getLocation(unphasedSiteQueue.peek().variant);
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
while (!partiallyPhasedSites.isEmpty()) {
if (nextToPhaseLoc != null) { // otherwise, unphasedSiteQueue.isEmpty(), and therefore no need to keep any of the "past"
@ -284,7 +284,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private void phaseSite(VariantAndReads vr, PhasingStats phaseStats) {
VariantContext vc = vr.variant;
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(vc));
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
UnfinishedVariantAndReads uvr = new UnfinishedVariantAndReads(vr);
UnfinishedVariantContext uvc = uvr.unfinishedVariant;
@ -321,7 +321,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
if (pr.phasingContainsInconsistencies) {
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
uvc.setPhasingInconsistent();
}
@ -360,7 +360,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
if (statsWriter != null)
statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
statsWriter.addStat(samp, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
if (sampPhaseCounts == null) {
@ -442,7 +442,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
// Add the (het) position to be phased:
GenomeLoc phaseLocus = VariantContextUtils.getLocation(vr.variant);
GenomeLoc phaseLocus = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant);
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);
listHetGenotypes.add(grbPhase);
logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]");
@ -456,7 +456,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
break;
}
else if (gt.isHet()) {
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(nextVr.variant));
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextVr.variant));
listHetGenotypes.add(grb);
logger.debug("Using DOWNSTREAM het site = " + grb.loc);
}
@ -922,15 +922,15 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) {
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(vc1), VariantContextUtils.getLocation(vc2));
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc1), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
}
private boolean startDistancesAreInWindowRange(GenomeLoc loc1, GenomeLoc loc2) {
return loc1.distance(loc2) <= cacheWindow; // distance() checks: loc1.onSameContig(loc2)
}
private static int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
return uvc1.getLocation().distance(VariantContextUtils.getLocation(vc2));
private int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
return uvc1.getLocation().distance(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
}
public PhasingStats reduce(PhasingStatsAndOutput statsAndList, PhasingStats stats) {
@ -1040,7 +1040,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
}
private static class UnfinishedVariantAndReads {
private class UnfinishedVariantAndReads {
public UnfinishedVariantContext unfinishedVariant;
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
@ -1052,11 +1052,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// COULD replace with MutableVariantContext if it worked [didn't throw exceptions when trying to call its set() methods]...
private static class UnfinishedVariantContext {
private class UnfinishedVariantContext {
private String name;
private String contig;
private long start;
private long stop;
private int start;
private int stop;
private Collection<Allele> alleles;
private Map<String, Genotype> genotypes;
private double negLog10PError;
@ -1080,7 +1080,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(contig, start, stop);
return getToolkit().getGenomeLocParser().createGenomeLoc(contig, start, stop);
}
public Genotype getGenotype(String sample) {
@ -1110,7 +1110,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
return sb.toString();
}
private static String toStringVCL(List<VariantContext> vcList) {
private String toStringVCL(List<VariantContext> vcList) {
boolean first = true;
StringBuilder sb = new StringBuilder();
for (VariantContext vc : vcList) {
@ -1119,7 +1119,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
else
sb.append(" -- ");
sb.append(VariantContextUtils.getLocation(vc));
sb.append(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
}
return sb.toString();
}
@ -1411,7 +1411,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public void outputMultipleBaseCounts() {
GenomeLoc nextToPhaseLoc = null;
if (!unphasedSiteQueue.isEmpty())
nextToPhaseLoc = VariantContextUtils.getLocation(unphasedSiteQueue.peek().variant);
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
outputMultipleBaseCounts(nextToPhaseLoc);
}

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -92,8 +94,15 @@ public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<Expandi
GenomeLoc cur = context.getLocation();
if ( verbose && showSkipped ) {
for ( int i = 0; i < context.getSkippedBases(); i++ ) {
out.printf("%s: skipped%n", GenomeLocParser.incPos(cur, i - context.getSkippedBases()));
for(long i = context.getSkippedBases(); i >= 0; i--) {
SAMSequenceDictionary dictionary = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary();
SAMSequenceRecord contig = dictionary.getSequence(cur.getContig());
if(cur.getStop() < contig.getSequenceLength())
cur = getToolkit().getGenomeLocParser().incPos(cur,1);
else
cur = getToolkit().getGenomeLocParser().createGenomeLoc(dictionary.getSequence(contig.getSequenceIndex()+1).getSequenceName(),1,1);
out.printf("%s: skipped%n", cur);
}
}

View File

@ -87,11 +87,11 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
return x;
}
public static String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
public String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
{
if ( a.size() != b.size() )
return "Sizes not equal";
GenomeLoc featureLocation = GenomeLocParser.createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
GenomeLoc featureLocation = getToolkit().getGenomeLocParser().createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
if ( a.getLocation().compareTo(featureLocation) != 0 )
return "Locations not equal";

View File

@ -86,9 +86,9 @@ public class PickSequenomProbes extends RodWalker<String, String> {
logger.info("Loading SNP mask... ");
ReferenceOrderedData snp_mask;
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
RMDTrackBuilder builder = new RMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser());
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
snpMaskIterator = new SeekableRODIterator(iter);
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
} else {
// TODO: fix me when Plink is back
@ -142,8 +142,8 @@ public class PickSequenomProbes extends RodWalker<String, String> {
if ( ! haveMaskForWindow ) {
String contig = context.getLocation().getContig();
long offset = context.getLocation().getStart();
long true_offset = offset - 200;
int offset = context.getLocation().getStart();
int true_offset = offset - 200;
// we have variant; let's load all the snps falling into the current window and prepare the mask array.
// we need to do it only once per window, regardless of how many vcs we may have at this location!
@ -152,7 +152,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
for ( int i = 0 ; i < 401; i++ )
maskFlags[i] = 0;
RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200));
RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200));
if ( snpList != null && snpList.size() != 0 ) {
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
int i = 0;

View File

@ -639,7 +639,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
else if ( group.requiresNovel() && vcKnown )
return false;
if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) )
if ( group.selectExp != null && ! VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, group.selectExp) )
return false;
// nothing invalidated our membership in this set

View File

@ -178,7 +178,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
int iii = 0;
for( final String key : annotationKeys ) {
annotationValues[iii++] = theModel.decodeAnnotation( key, vc, true );
annotationValues[iii++] = theModel.decodeAnnotation( getToolkit().getGenomeLocParser(), key, vc, true );
}
final VariantDatum variantDatum = new VariantDatum();

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -425,7 +426,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
}
}
public double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
public double decodeAnnotation(GenomeLocParser genomeLocParser, final String annotationKey, final VariantContext vc, final boolean jitter ) {
double value;
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // HRun values must be jittered a bit to work in this GMM
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
@ -436,18 +437,18 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
try {
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
} catch( Exception e ) {
throw new UserException.MalformedFile(vc.getSource(), "No double value detected for annotation = " + annotationKey + " in variant at " + VariantContextUtils.getLocation(vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ), e );
throw new UserException.MalformedFile(vc.getSource(), "No double value detected for annotation = " + annotationKey + " in variant at " + VariantContextUtils.getLocation(genomeLocParser,vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ), e );
}
}
return value;
}
public final double evaluateVariant( final VariantContext vc ) {
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
final double[] pVarInCluster = new double[maxGaussians];
final double[] annotations = new double[dataManager.numAnnotations];
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
final double value = decodeAnnotation( dataManager.annotationKeys.get(jjj), vc, false );
final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false );
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
}

View File

@ -283,7 +283,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
priorCache.put( priorLodFactor, false, priorKey );
}
final double pVar = theModel.evaluateVariant( vc );
final double pVar = theModel.evaluateVariant( ref.getGenomeLocParser(),vc );
final double lod = priorLodFactor + Math.log10(pVar);
variantDatum.qual = Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod) );
if( variantDatum.qual > maxQualObserved ) {

View File

@ -129,7 +129,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// get all of the vcf rods at this locus
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption,
VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);

View File

@ -131,7 +131,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
// update if necessary and write
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
int difference = originalIndex - newCigar.getCigarElement(0).getLength();
GenomeLoc newLoc = GenomeLocParser.createPotentiallyInvalidGenomeLoc(vc.getChr(), vc.getStart()-difference, vc.getEnd()-difference);
GenomeLoc newLoc = getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(vc.getChr(), vc.getStart()-difference, vc.getEnd()-difference);
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + newLoc);
VariantContext newVC = VariantContextUtils.modifyLocation(vc, newLoc);

View File

@ -96,7 +96,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
vc = VariantContextUtils.reverseComplement(vc);
}
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
vc = VariantContextUtils.modifyLocation(vc, getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
if ( originalVC.isSNP() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {

View File

@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
//System.out.printf("%s%n",sub.toString());
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
if ( !VariantContextUtils.match(sub, jexl) ) {
if ( !VariantContextUtils.match(getToolkit().getGenomeLocParser(),sub, jexl) ) {
return 0;
}
}

View File

@ -56,7 +56,7 @@ public class CreateTiTvTrack extends RodWalker<VariantContext,TiTvWindow> {
window.update(VariantContextUtils.isTransition(vc));
if ( window.getTiTv() != null ) {
writer.writeData(VariantContextUtils.getLocation(vc),window.getTiTv());
writer.writeData(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc),window.getTiTv());
}
return window;

View File

@ -287,7 +287,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
}
private void shiftWindows(long pos) {
private void shiftWindows(int pos) {
// we shift windows when there is a read that does not fit into the current window.
// the position, to which the shift is performed, is the first position such that the new read
// can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope -
@ -332,7 +332,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
purgeSignal(pos);
purgeControl(pos);
currentWindow = GenomeLocParser.createGenomeLoc(currentWindow.getContigIndex(),pos,pos+WINDOW_SIZE-1);
currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(currentWindow.getContig(),pos,pos+WINDOW_SIZE-1);
}
@Override
@ -349,7 +349,8 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
controlReadGroups = readGroupSets.get(1);
// System.out.println(controlReadGroups.size()+" read groups in control");
currentWindow = GenomeLocParser.createGenomeLoc(0,1,WINDOW_SIZE);
String sequenceName = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequence(0).getSequenceName();
currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(sequenceName,1,WINDOW_SIZE);
readsInSignalWindow = new LinkedList<SAMRecord>();
readsInControlWindow = new LinkedList<SAMRecord>();
@ -366,7 +367,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) {
printRegion(); // print all we had on the previous contig
currentWindow = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),
currentWindow = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),
read.getAlignmentStart(),
read.getAlignmentStart()+WINDOW_SIZE-1);
currentContig = read.getReferenceName();

View File

@ -95,7 +95,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
}
for ( Map.Entry<String,BEDFeature> additionalGenes : currentBedFeatures.entrySet() ) {
GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd());
GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd());
if ( interval.overlapsP(entryLoc) &&
! additionalGenes.getValue().getName().equals("") &&
! intervalBuffer.get(interval).geneNames.contains(additionalGenes.getKey()+"_"+additionalGenes.getValue().getName())) {
@ -142,7 +142,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
}
for ( Map.Entry<String,BEDFeature> entry : currentBedFeatures.entrySet() ) {
GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd());
GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd());
if ( entryLoc.isBefore(ref.getLocus()) ) {
currentBedFeatures.remove(entry.getKey());
}

View File

@ -39,7 +39,9 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(RefseqFileName, e);
}
@ -128,14 +130,14 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
} else {
if ( RefSeqFeature.isCoding(ann) ) {
//b.append(annIntron); // not in exon, but within the coding region = intron
GenomeLoc ig = GenomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
GenomeLoc ig = getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
GenomeLoc cl = t.getCodingLocation();
GenomeLoc g = t.getLocation();
boolean spliceSiteDisruption = false;
for (GenomeLoc exon : t.getExons()) {
GenomeLoc expandedExon = GenomeLocParser.createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6);
GenomeLoc expandedExon = getToolkit().getGenomeLocParser().createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6);
if (ig.overlapsP(expandedExon)) {
spliceSiteDisruption = true;

View File

@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -71,7 +72,7 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
private void finalUpdate(OverlapTabulator tab) {
while ( ! evalContexts.isEmpty() ) {
tab.update(emptyOverlapTable());
tab.update(emptyOverlapTable(getToolkit().getGenomeLocParser()));
}
}
@ -119,25 +120,25 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
public OverlapTable getOverlapTable(ReferenceContext ref) {
// step 1: check that the eval queue is non-empty and that we are outside the window
if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) {
if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) {
return null;
}
// step 2: discard all comp variations which come before the window
while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(compContexts.get(0)).isBefore(ref.getLocus()) &&
VariantContextUtils.getLocation(compContexts.get(0)).distance(ref.getLocus()) > indelWindow) {
while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).isBefore(ref.getLocus()) &&
VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).distance(ref.getLocus()) > indelWindow) {
compContexts.remove(0);
}
// step 3: see if there are any contexts left; if so then they must be within the window
if ( ! compContexts.isEmpty() ) {
return nonEmptyOverlapTable(ref);
} else {
return emptyOverlapTable();
return emptyOverlapTable(ref.getGenomeLocParser());
}
}
public OverlapTable emptyOverlapTable() {
public OverlapTable emptyOverlapTable(GenomeLocParser genomeLocParser) {
// only eval, no comp
OverlapTable ot = new OverlapTable();
OverlapTable ot = new OverlapTable(genomeLocParser);
ot.setEvalSizeAndType(evalContexts.get(0));
return ot;
}
@ -145,17 +146,17 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) {
if ( vcfWriter != null ) {
int i = 0;
while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).isBefore(VariantContextUtils.getLocation(evalContexts.get(0)))) {
while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).isBefore(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)))) {
vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]);
i++;
}
vcfWriter.add(evalContexts.get(0), ref.getBase());
while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).distance(VariantContextUtils.getLocation(evalContexts.get(0))) <= indelWindow) {
while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).distance(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0))) <= indelWindow) {
vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]);
i++;
}
}
OverlapTable ot = new OverlapTable();
OverlapTable ot = new OverlapTable(ref.getGenomeLocParser());
ot.setCompOverlaps(compContexts.size());
ot.setDistances(compContexts,evalContexts.get(0), indelWindow);
return ot;
@ -164,13 +165,15 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
}
class OverlapTable {
private GenomeLocParser genomeLocParser;
private int numOverlaps;
private ExpandingArrayList<Integer> distances; // currently unused
private int evalSize;
private boolean isDeletion;
public OverlapTable() {
public OverlapTable(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
numOverlaps = 0;
}
@ -187,8 +190,8 @@ class OverlapTable {
public void setDistances(List<VariantContext> comps, VariantContext eval, int winsize) {
distances = new ExpandingArrayList<Integer>();
for ( VariantContext comp : comps ) {
if ( VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)) <= winsize ) {
distances.add(VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)));
if ( VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)) <= winsize ) {
distances.add(VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)));
}
}
}

View File

@ -143,7 +143,7 @@ public class IndelErrorRateWalker extends LocusWalker<Integer,Integer> {
// System.out.println("Non countable indel event at "+pileup.getLocation());
countableIndelBuffer.clear();
coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well
skipToLoc = GenomeLocParser.createGenomeLoc(pileup.getLocation().getContigIndex(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1);
skipToLoc = ref.getGenomeLocParser().createGenomeLoc(pileup.getLocation().getContig(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1);
// System.out.println("Skip to "+skipToLoc);
} else {
// pileup does not contain too many indels, we need to store them in the buffer and count them later,

View File

@ -83,7 +83,7 @@ public class MarkIntervals extends RodWalker<Long, Long> {
try {
for ( String line : new XReadLines(locs, true) ) {
String parts[] = line.split(":");
badSites.add(GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1])));
badSites.add(getToolkit().getGenomeLocParser().createGenomeLoc(parts[0], Integer.valueOf(parts[1])));
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(locs, e);

View File

@ -237,7 +237,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
}
public GenomeLoc getLocus() {
return VariantContextUtils.getLocation(trio);
return VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),trio);
}
public byte getRefBase() {
@ -623,7 +623,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
logger.info(String.format("%s,%s,%s,%d,%d",entryRegion.getKey(),entryRegion.getValue().regionStart,entryRegion.getValue().lastSeen,
entryRegion.getValue().deNovoSNPsInRegion,entryRegion.getValue().oppositeHomsInRegion));
int chr_end = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(entryRegion.getValue().getContigStr()).getSequenceLength();
entryRegion.getValue().endedBy = GenomeLocParser.createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end);
entryRegion.getValue().endedBy = getToolkit().getGenomeLocParser().createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end);
to_print.add(entryRegion.getValue());
}
}

View File

@ -58,7 +58,8 @@ import java.io.*;
* This walker is designed to be used in conjunction with NeighborhoodQualityWalker.
*/
public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
public class
ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
@Output
protected PrintStream out;
@Argument(fullName = "inputQualityFile", shortName = "if", doc = "Input quality score file generated by NeighborhoodQualityWalker", required = true)
@ -98,7 +99,7 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
// BUGBUG: This assumes reads will be sorted by start location
float sumNeighborhoodQuality = 0.0f;
int numLines = 0;
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc( read );
GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc( read );
if( readLoc.size() > 0 ) { // only calculate mean NQS if the read has a well formed GenomeLoc, if not NQS will be zero
try {
if( line == null ) {
@ -106,12 +107,12 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file is empty" ); }
}
String[] halves = line.split( " ", 2 );
GenomeLoc curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
GenomeLoc curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
while( curLoc.isBefore( readLoc ) ) { // Loop until the beginning of the read
line = inputReader.readLine();
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find beginning of read: " + readLoc ); }
halves = line.split( " ", 2 );
curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
}
// now we have skipped ahead in the input file to where this read starts
logger.debug( "Starting: " + curLoc + ", read: " + readLoc + "\t size: " + readLoc.size() );
@ -124,7 +125,7 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
line = inputReader.readLine();
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find end of read: " + readLoc ); }
halves = line.split( " ", 2 );
curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
}
// now we have parsed the input file up to where the read ends
// reset back to the mark in order to parse the next read in the next call to the reduce function

View File

@ -60,7 +60,7 @@ public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
public void initialize() {
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
intervals = new IntervalFileMergingIterator( new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
currentInterval = intervals.hasNext() ? intervals.next() : null;
}
@ -69,10 +69,10 @@ public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
return 0;
}
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read);
// hack to get around unmapped reads having screwy locations
if ( readLoc.getStop() == 0 )
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart());
readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) )
return 0;

View File

@ -130,7 +130,7 @@ public class TestReadFishingWalker extends ReadWalker<Integer,Long> {
else
throw new ReviewedStingException("Invalid indel type: " + type);
aligners.put(GenomeLocParser.createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration()));
aligners.put(getToolkit().getGenomeLocParser().createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration()));
if(++numAlignersCreated % 100 == 0)
out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime);
}

View File

@ -28,9 +28,9 @@ public class ValidateRODForReads extends ReadWalker<Integer, Integer> {
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
if (tracker != null) {
Map<Long, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
for (Map.Entry<Long, Collection<GATKFeature>> entry : mapping.entrySet()) {
GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey());
Map<Integer, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
for (Map.Entry<Integer, Collection<GATKFeature>> entry : mapping.entrySet()) {
GenomeLoc location = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),entry.getKey());
if (!map.containsKey(location)) {
map.put(location,0);
}

View File

@ -113,7 +113,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker<List<SAMRecord>, SA
// out.printf("Combining Read %s%n", read.format());
// }
//
combinedRead = DupUtils.combineDuplicates(reads, MAX_QUALITY_SCORE);
combinedRead = DupUtils.combineDuplicates(getToolkit().getGenomeLocParser(),reads, MAX_QUALITY_SCORE);
//out.printf(" => into %s%n", combinedRead.format());
}

View File

@ -468,10 +468,10 @@ public class UnifiedGenotyperEngine {
// if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome)
if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) {
GenomeLoc window = GenomeLocParser.createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength()));
GenomeLoc window = refContext.getGenomeLocParser().createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength()));
byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases();
StringUtil.toUpperCase(bases);
refContext = new ReferenceContext(refContext.getLocus(), window, bases);
refContext = new ReferenceContext(refContext.getGenomeLocParser(),refContext.getLocus(), window, bases);
}
BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE);

View File

@ -136,10 +136,10 @@ public class RemapAlignments extends CommandLineProgram {
}
h.setSequenceDictionary(reference.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
GenomeLocParser genomeLocParser = new GenomeLocParser(reference.getSequenceDictionary());
map = new GenomicMap(10000);
map.read(MAP_FILE);
map.read(genomeLocParser,MAP_FILE);
System.out.println("Map loaded successfully: "+map.size()+" contigs");

View File

@ -40,12 +40,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.*;
@ -95,7 +90,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
* where start, stop are 0 based, closed intervals
* @param f
*/
public void readArachne(File f) {
public void readArachne(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,File f) {
try {
BufferedReader reader = new BufferedReader( new FileReader(f) );
@ -127,9 +122,10 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
for ( int i = 0 ; i < coord_parts.length ; i += 3 ) {
// Arachne map file contains 0-based, closed intervals, hence +1 below.
int index = Integer.parseInt(coord_parts[i]);
long start = Long.parseLong(coord_parts[i+1]);
long stop = Long.parseLong(coord_parts[i+2]);
segments.add(GenomeLocParser.createGenomeLoc(index, start+1, stop+1));
String contig = sequenceDictionary.getSequence(index).getSequenceName();
int start = Integer.parseInt(coord_parts[i+1]);
int stop = Integer.parseInt(coord_parts[i+2]);
segments.add(genomeLocParser.createGenomeLoc(contig, start+1, stop+1));
}
addCustomContig(name, segments);
@ -148,7 +144,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
* where start, stop are 1 based, closed intervals
* @param f
*/
public void read(File f) {
public void read(GenomeLocParser genomeLocParser,File f) {
try {
BufferedReader reader = new BufferedReader( new FileReader(f) );
@ -171,7 +167,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
while ( p2 < line.length() && line.charAt(p2) != ',') p2++; // next comma or end-of-line
while ( p2 != p1 ) {
GenomeLoc newSegment = GenomeLocParser.parseGenomeLoc(line.substring(p1, p2));
GenomeLoc newSegment = genomeLocParser.parseGenomeLoc(line.substring(p1, p2));
if ( segments.size() > 0 &&
segments.get(segments.size()-1).getStop()+1 == newSegment.getStart() &&
segments.get(segments.size()-1).getContigIndex() == newSegment.getContigIndex())
@ -408,7 +404,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
SAMRecord r = new SAMRecord(reader.getFileHeader());
GenomeLocParser.setupRefContigOrdering(reader.getFileHeader().getSequenceDictionary());
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
r.setReferenceName("ENST00000378466");
r.setAlignmentStart(1235);
@ -421,9 +417,9 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
GenomicMap m = new GenomicMap(5);
// m.readArachne(new File("/humgen/gsa-scr1/asivache/cDNA/Ensembl48.transcriptome.map"));
// m.readArachne(genomeLocParser,new File("/humgen/gsa-scr1/asivache/cDNA/Ensembl48.transcriptome.map"));
// m.write(new File("/humgen/gsa-scr1/asivache/cDNA/new_pipeline/Ensembl48.new.transcriptome.map"));
m.read(new File("W:/berger/cDNA_BAM/refs/Ensembl52.plus.Genome.map"));
m.read(genomeLocParser,new File("W:/berger/cDNA_BAM/refs/Ensembl52.plus.Genome.map"));
m.remapToMasterReference(r,reader.getFileHeader(),true);

View File

@ -13,7 +13,7 @@ import java.io.Serializable;
* Date: Mar 2, 2009
* Time: 8:50:11 AM
*
* Genome location representation. It is *** 1 *** based
* Genome location representation. It is *** 1 *** based closed.
*
*
*/
@ -23,8 +23,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
* start and stop position, and (optionally) the contig name
*/
protected final int contigIndex;
protected final long start;
protected final long stop;
protected final int start;
protected final int stop;
protected final String contigName;
// --------------------------------------------------------------------------------------------------------------
@ -32,93 +32,25 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
// constructors
//
// --------------------------------------------------------------------------------------------------------------
/*GenomeLoc( int contigIndex, final long start, final long stop ) {
MAX_CONTIG = Integer.MAX_VALUE;
if (start < 0) { throw new StingException("Bad start position " + start);}
if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position
this.contigIndex = contigIndex;
this.start = start;
this.contigName = null; // we just don't know
this.stop = stop == -1 ? start : stop;
}*/
protected GenomeLoc(final SAMRecord read) {
this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
}
protected GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) {
protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) {
this.contigName = contig;
this.contigIndex = contigIndex;
this.start = start;
this.stop = stop;
}
/*GenomeLoc( final int contig, final long pos ) {
this(contig, pos, pos );
}
*/
protected GenomeLoc( final GenomeLoc toCopy ) {
this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
}
/**
* Returns true if we have a specified series of locations to process AND we are past the last
* location in the list. It means that, in a serial processing of the genome, that we are done.
*
* @param curr Current genome Location
* @param locs a list of genomic locations
* @return true if we are past the last location to process
* Return a new GenomeLoc at this same position.
* @return A GenomeLoc with the same contents as the current loc.
*/
public static boolean pastFinalLocation(GenomeLoc curr, List<GenomeLoc> locs) {
return (locs.size() > 0 && curr.isPast(locs.get(locs.size() - 1)));
}
/**
* A key function that returns true if the proposed GenomeLoc curr is within the list of
* locations we are processing in this TraversalEngine
*
* @param curr the current location
* @param locs a list of genomic locations
* @return true if we should process GenomeLoc curr, otherwise false
*/
public static boolean inLocations(GenomeLoc curr, ArrayList<GenomeLoc> locs) {
if ( locs.size() == 0 ) {
return true;
} else {
for ( GenomeLoc loc : locs ) {
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
if (loc.overlapsP(curr))
return true;
}
return false;
}
}
public static void removePastLocs(GenomeLoc curr, List<GenomeLoc> locs) {
while ( !locs.isEmpty() && curr.isPast(locs.get(0)) ) {
//System.out.println("At: " + curr + ", removing: " + locs.get(0));
locs.remove(0);
}
}
public static boolean overlapswithSortedLocsP(GenomeLoc curr, List<GenomeLoc> locs, boolean returnTrueIfEmpty) {
if ( locs.isEmpty() )
return returnTrueIfEmpty;
// skip loci before intervals begin
if ( curr.contigIndex < locs.get(0).contigIndex )
return false;
for ( GenomeLoc loc : locs ) {
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
if ( loc.overlapsP(curr) )
return true;
if ( curr.compareTo(loc) < 0 )
return false;
}
return false;
@Override
public GenomeLoc clone() {
return new GenomeLoc(getContig(),getContigIndex(),getStart(),getStop());
}
//
@ -129,8 +61,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
}
public final int getContigIndex() { return this.contigIndex; }
public final long getStart() { return this.start; }
public final long getStop() { return this.stop; }
public final int getStart() { return this.start; }
public final int getStop() { return this.stop; }
public final String toString() {
if ( throughEndOfContigP() && atBeginningOfContigP() )
return getContig();
@ -139,13 +71,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
else
return String.format("%s:%d-%d", getContig(), getStart(), getStop());
}
public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; }
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
public final boolean atBeginningOfContigP() { return this.start == 1; }
public final boolean isSingleBP() { return stop == start; }
private boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
private boolean atBeginningOfContigP() { return this.start == 1; }
public final boolean disjointP(GenomeLoc that) {
return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop;
@ -187,15 +114,6 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
}
/**
* Returns true if this GenomeLoc contains the start position of GenomeLoc that, on the same contig
* @param start
* @return
*/
public final boolean containsStartPosition(long start) {
return getStart() <= start && start <= getStop();
}
public final boolean onSameContig(GenomeLoc that) {
return (this.contigIndex == that.contigIndex);
}
@ -215,26 +133,26 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
}
/**
* Tests whether this contig is completely before contig 'that'.
* @param that Contig to test against.
* @return true if this contig ends before 'that' starts; false if this is completely after or overlaps 'that'.
*/
public final boolean isBefore( GenomeLoc that ) {
int comparison = this.compareContigs(that);
return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));
}
/**
* Tests whether this contig is completely after contig 'that'.
* @param that Contig to test against.
* @return true if this contig starts after 'that' ends; false if this is completely before or overlaps 'that'.
*/
public final boolean isPast( GenomeLoc that ) {
int comparison = this.compareContigs(that);
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
}
public final boolean startsBefore( GenomeLoc that ) {
int comparison = this.compareContigs(that);
return ( comparison == -1 || ( comparison == 0 && this.getStart() < that.getStart() ));
}
public final boolean startsAfter( GenomeLoc that ) {
int comparison = this.compareContigs(that);
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStart() ));
}
// Return the minimum distance between any pair of bases in this and that GenomeLocs:
public final int minDistance( final GenomeLoc that ) {
if (!this.onSameContig(that))
@ -281,15 +199,6 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
}
/**
* Return a new GenomeLoc at this same position.
* @return A GenomeLoc with the same contents as the current loc.
*/
@Override
public GenomeLoc clone() {
return new GenomeLoc(this);
}
/**
* conpare this genomeLoc's contig to another genome loc
* @param that the genome loc to compare contigs with

View File

@ -57,24 +57,33 @@ import org.broadinstitute.sting.utils.text.XReadLines;
public class GenomeLocParser {
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
//private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3
// --------------------------------------------------------------------------------------------------------------
//
// Ugly global variable defining the optional ordering of contig elements
//
// --------------------------------------------------------------------------------------------------------------
//public static Map<String, Integer> refContigOrdering = null;
protected static SAMSequenceDictionary contigInfo = null;
protected SAMSequenceDictionary contigInfo = null;
/**
* do we have a contig ordering setup?
*
* @return true if the contig order is setup
* set our internal reference contig order
* @param refFile the reference file
*/
public static boolean hasKnownContigOrdering() {
return contigInfo != null;
public GenomeLocParser(final ReferenceSequenceFile refFile) {
this(refFile.getSequenceDictionary());
}
public GenomeLocParser(SAMSequenceDictionary seqDict) {
if (seqDict == null) { // we couldn't load the reference dictionary
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
throw new UserException.CommandLineException("Failed to load reference dictionary");
} else if (contigInfo == null) {
contigInfo = seqDict;
logger.debug(String.format("Prepared reference sequence contig dictionary"));
for (SAMSequenceRecord contig : seqDict.getSequences()) {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
}
}
}
/**
@ -84,7 +93,7 @@ public class GenomeLocParser {
*
* @return the sam sequence record
*/
public static SAMSequenceRecord getContigInfo(final String contig) {
public SAMSequenceRecord getContigInfo(final String contig) {
return contigInfo.getSequence(contig);
}
@ -96,53 +105,13 @@ public class GenomeLocParser {
*
* @return the contig index, -1 if not found
*/
public static int getContigIndex(final String contig, boolean exceptionOut) {
public int getContigIndex(final String contig, boolean exceptionOut) {
if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut)
throw new UserException.CommandLineException(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
return contigInfo.getSequenceIndex(contig);
}
/**
* set our internal reference contig order
*
* @param refFile the reference file
*
* @return true if we were successful
*/
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
return setupRefContigOrdering(refFile.getSequenceDictionary());
}
/**
* setup our internal reference contig order
*
* @param seqDict the sequence dictionary
*
* @return true if we were successful
*/
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
if (seqDict == null) { // we couldn't load the reference dictionary
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
throw new UserException.CommandLineException("Failed to load reference dictionary");
} else if (contigInfo == null) {
contigInfo = seqDict;
logger.debug(String.format("Prepared reference sequence contig dictionary"));
for (SAMSequenceRecord contig : seqDict.getSequences()) {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
}
}
return true;
}
/**
* A package-protected method that can be used by the test system to reset the sequence dictionary
* being used. Use this method sparingly.
*/
static void clearRefContigOrdering() {
contigInfo = null;
}
/**
* parse a genome interval, from a location string
*
@ -155,7 +124,7 @@ public class GenomeLocParser {
*
*/
public static GenomeLoc parseGenomeInterval(final String str) {
public GenomeLoc parseGenomeInterval(final String str) {
GenomeLoc ret = parseGenomeLoc(str);
exceptionOnInvalidGenomeLocBounds(ret);
return ret;
@ -173,13 +142,13 @@ public class GenomeLocParser {
* @return a GenomeLoc representing the String
*
*/
public static GenomeLoc parseGenomeLoc(final String str) {
public GenomeLoc parseGenomeLoc(final String str) {
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
//System.out.printf("Parsing location '%s'%n", str);
String contig = null;
long start = 1;
long stop = -1;
int start = 1;
int stop = -1;
final int colonIndex = str.indexOf(":");
if(colonIndex == -1) {
@ -210,7 +179,7 @@ public class GenomeLocParser {
if (!isContigValid(contig))
throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
if (stop == Integer.MAX_VALUE)
// lookup the actually stop position!
stop = getContigInfo(contig).getSequenceLength();
@ -228,7 +197,7 @@ public class GenomeLocParser {
* Parses a number like 1,000,000 into a long.
* @param pos
*/
private static long parsePosition(final String pos) {
private int parsePosition(final String pos) {
//String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps
//System.out.println("Parsing position: '" + pos + "'");
if(pos.indexOf('-') != -1) {
@ -244,13 +213,13 @@ public class GenomeLocParser {
continue;
} else if(c < '0' || c > '9') {
throw new NumberFormatException("Position: '" + pos + "' contains invalid chars." );
} else {
} else {
buffer.append(c);
}
}
return Long.parseLong(buffer.toString());
return Integer.parseInt(buffer.toString());
} else {
return Long.parseLong(pos);
return Integer.parseInt(pos);
}
}
@ -263,7 +232,7 @@ public class GenomeLocParser {
*
* @return the list of merged locations
*/
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
public List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
if (raw.size() <= 1)
return raw;
else {
@ -292,7 +261,7 @@ public class GenomeLocParser {
*
* @return True if the contig is valid. False otherwise.
*/
private static boolean isContigValid(String contig) {
private boolean isContigValid(String contig) {
int contigIndex = contigInfo.getSequenceIndex(contig);
return contigIndex >= 0 && contigIndex < contigInfo.size();
}
@ -309,7 +278,7 @@ public class GenomeLocParser {
* Validation: only checks that contig is valid
* start/stop could be anything
*/
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) {
if (!isContigValid(contig))
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
return new GenomeLoc(contig, getContigIndex(contig,true), start, stop);
@ -327,7 +296,7 @@ public class GenomeLocParser {
* @param allowEmptyIntervalList if false empty interval lists will return null
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
*/
public static List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
public List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
// try to open file
File inputFile = new File(file_name);
@ -344,7 +313,7 @@ public class GenomeLocParser {
// case: BED file
if (file_name.toUpperCase().endsWith(".BED")) {
BedParser parser = new BedParser(inputFile);
BedParser parser = new BedParser(this,inputFile);
return parser.getLocations();
}
@ -393,22 +362,8 @@ public class GenomeLocParser {
*
* @return the string that represents that contig name
*/
private static String getSequenceNameFromIndex(int contigIndex) {
return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName();
}
/**
* create a genome loc, given the contig name, start, and stop
*
* @param contig the contig name
* @param start the starting position
* @param stop the stop position
*
* @return a new genome loc
*/
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
checkSetup();
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), start, stop));
private String getSequenceNameFromIndex(int contigIndex) {
return contigInfo.getSequence(contigIndex).getSequenceName();
}
/**
@ -420,31 +375,21 @@ public class GenomeLocParser {
*
* @return a new genome loc - but don't exception out if it is invalid
*/
public static GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final long start, final long stop) {
checkSetup();
return new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,false), start, stop);
public GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final int start, final int stop) {
return new GenomeLoc(contig, getContigIndex(contig,false), start, stop);
}
/**
* create a genome loc, given the contig index, start, and stop
* create a genome loc, given the contig name, start, and stop
*
* @param contigIndex the contig index
* @param start the start position
* @param stop the stop position
* @param contig the contig name
* @param start the starting position
* @param stop the stop position
*
* @return a new genome loc
*/
public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) {
checkSetup();
if (start < 0) {
throw new ReviewedStingException("Bad start position " + start);
}
if (stop < -1) {
throw new ReviewedStingException("Bad stop position " + stop);
} // a negative -1 indicates it's not a meaningful end position
return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop);
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), start, stop));
}
/**
@ -454,25 +399,10 @@ public class GenomeLocParser {
*
* @return
*/
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
checkSetup();
public GenomeLoc createGenomeLoc(final SAMRecord read) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
}
/**
* create a new genome loc, given the contig position, and a single position
*
* @param contig the contig name
* @param pos the postion
*
* @return a genome loc representing a single base at the specified postion on the contig
*/
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
checkSetup();
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
}
/**
* create a new genome loc, given the contig name, and a single position
*
@ -481,14 +411,8 @@ public class GenomeLocParser {
*
* @return a genome loc representing a single base at the specified postion on the contig
*/
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
checkSetup();
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), pos, pos));
}
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
checkSetup();
return exceptionOnInvalidGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), pos, pos));
}
/**
@ -505,7 +429,7 @@ public class GenomeLocParser {
* @return the genome loc if it's valid, otherwise we throw an exception
*
*/
private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
if (toReturn.getStart() < 0) {
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
}
@ -534,7 +458,7 @@ public class GenomeLocParser {
*
* @param locus Locus to verify.
*/
private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
if(locus.getStart() > contigSize)
throw new ReviewedStingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig()));
@ -554,8 +478,7 @@ public class GenomeLocParser {
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(GenomeLoc loc) {
checkSetup();
public boolean validGenomeLoc(GenomeLoc loc) {
// quick check before we get the contig size, is the contig number valid
if ((loc.getContigIndex() < 0) || // the contig index has to be positive
(loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs)
@ -583,9 +506,8 @@ public class GenomeLocParser {
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(String contig, long start, long stop) {
checkSetup();
return validGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig, false), start, stop));
public boolean validGenomeLoc(String contig, int start, int stop) {
return validGenomeLoc(new GenomeLoc(contig, getContigIndex(contig, false), start, stop));
}
@ -600,58 +522,11 @@ public class GenomeLocParser {
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(int contigIndex, long start, long stop) {
checkSetup();
public boolean validGenomeLoc(int contigIndex, int start, int stop) {
if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false;
return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop));
}
/**
* Move this Genome loc to the next contig, with a start
* and stop of 1.
*
* @return true if we are not out of contigs, otherwise false if we're
* at the end of the genome (no more contigs to jump to).
*/
public static GenomeLoc toNextContig(GenomeLoc current) {
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
return null;
} else
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
}
/**
* create a new genome loc, given an old location and a new contig
*
* @param loc the old location
* @param contig the new contig to set
*
* @return a new genome loc with an updated contig name and index
*/
public static GenomeLoc setContig(GenomeLoc loc, String contig) {
checkSetup();
int index = -1;
if ((index = contigInfo.getSequenceIndex(contig)) < 0) {
throw new ReviewedStingException("Contig name ( " + contig + " ) not in the set sequence dictionary.");
}
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop()));
}
/**
* Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index
*
* @param contig
*/
public static GenomeLoc setContigIndex(GenomeLoc loc, int contig) {
checkSetup();
if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) {
throw new ReviewedStingException("Contig index ( " + contig + " ) is not in the sequence dictionary set.");
}
return exceptionOnInvalidGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop()));
}
/**
* create a new genome loc from an existing loc, with a new start position
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
@ -662,8 +537,7 @@ public class GenomeLocParser {
*
* @return the newly created genome loc
*/
public static GenomeLoc setStart(GenomeLoc loc, long start) {
checkSetup();
public GenomeLoc setStart(GenomeLoc loc, int start) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
}
@ -677,8 +551,7 @@ public class GenomeLocParser {
*
* @return
*/
public static GenomeLoc setStop(GenomeLoc loc, long stop) {
checkSetup();
public GenomeLoc setStop(GenomeLoc loc, int stop) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
}
@ -689,7 +562,7 @@ public class GenomeLocParser {
*
* @return a new genome loc
*/
public static GenomeLoc incPos(GenomeLoc loc) {
public GenomeLoc incPos(GenomeLoc loc) {
return incPos(loc, 1);
}
@ -701,41 +574,19 @@ public class GenomeLocParser {
*
* @return a new genome loc
*/
public static GenomeLoc incPos(GenomeLoc loc, long by) {
public GenomeLoc incPos(GenomeLoc loc, int by) {
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
}
/**
* create a new genome loc with an incremented position
*
* @param loc the location
*
* @return a new genome loc
* Creates a GenomeLoc than spans the entire contig.
* @param contigName Name of the contig.
* @return A locus spanning the entire contig.
*/
public static GenomeLoc nextLoc(GenomeLoc loc) {
return incPos(loc);
}
/** check to make sure that we've setup the contig information */
private static void checkSetup() {
if (contigInfo == null) {
throw new ReviewedStingException("The GenomeLocParser hasn't been setup with a contig sequence yet");
}
}
/**
* compare two contig names, in the current context
*
* @param firstContig
* @param secondContig
*
* @return
*/
public static int compareContigs(String firstContig, String secondContig) {
checkSetup();
Integer ref1 = GenomeLocParser.getContigIndex(firstContig,true);
Integer ref2 = GenomeLocParser.getContigIndex(secondContig,true);
return ref1.compareTo(ref2);
}
public GenomeLoc createOverEntireContig(String contigName) {
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
if(contig == null)
throw new ReviewedStingException("Unable to find contig named " + contigName);
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength()));
}
}

View File

@ -38,25 +38,36 @@ import java.util.*;
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
private GenomeLocParser genomeLocParser;
// our private storage for the GenomeLoc's
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
/** default constructor */
public GenomeLocSortedSet() {
public GenomeLocSortedSet(GenomeLocParser parser) {
this.genomeLocParser = parser;
}
public GenomeLocSortedSet(GenomeLoc e) {
this();
public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) {
this(parser);
add(e);
}
public GenomeLocSortedSet(Collection<GenomeLoc> l) {
this();
public GenomeLocSortedSet(GenomeLocParser parser,Collection<GenomeLoc> l) {
this(parser);
for ( GenomeLoc e : l )
add(e);
}
/**
* Gets the GenomeLocParser used to create this sorted set.
* @return The parser. Will never be null.
*/
public GenomeLocParser getGenomeLocParser() {
return genomeLocParser;
}
/**
* get an iterator over this collection
*
@ -201,7 +212,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
logger.debug("removeRegions operation: i = " + i);
}
return GenomeLocSortedSet.createSetFromList(good);
return createSetFromList(genomeLocParser,good);
}
private static final List<GenomeLoc> EMPTY_LIST = new ArrayList<GenomeLoc>();
@ -221,8 +232,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* |------| + |--------|
*
*/
GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
if (after.getStop() - after.getStart() >= 0) {
l.add(after);
}
@ -255,9 +266,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
GenomeLoc n;
if (e.getStart() < g.getStart()) {
n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
n = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
} else {
n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
n = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
}
// replace g with the new region
@ -283,9 +294,10 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* @return the GenomeLocSet of all references sequences as GenomeLoc's
*/
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
GenomeLocParser parser = new GenomeLocParser(dict);
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser);
for (SAMSequenceRecord record : dict.getSequences()) {
returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
returnSortedSet.add(parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength()));
}
return returnSortedSet;
}
@ -297,8 +309,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
*
* @return the sorted genome loc list
*/
public static GenomeLocSortedSet createSetFromList(List<GenomeLoc> locs) {
GenomeLocSortedSet set = new GenomeLocSortedSet();
public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser,List<GenomeLoc> locs) {
GenomeLocSortedSet set = new GenomeLocSortedSet(parser);
set.addAll(locs);
return set;
}
@ -307,13 +319,13 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
/**
* return a deep copy of this collection.
*
* @return a new GenomeLocSortedSet, indentical to the current GenomeLocSortedSet.
* @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet.
*/
public GenomeLocSortedSet clone() {
GenomeLocSortedSet ret = new GenomeLocSortedSet();
GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser);
for (GenomeLoc loc : this.mArray) {
// ensure a deep copy
ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
ret.mArray.add(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop()));
}
return ret;
}

View File

@ -20,6 +20,8 @@ public class BedParser {
// the buffered reader input
private final BufferedReader mIn;
private GenomeLocParser genomeLocParser;
// our array of locations
private List<GenomeLoc> mLocations;
@ -28,7 +30,8 @@ public class BedParser {
*
* @param fl
*/
public BedParser(File fl) {
public BedParser(GenomeLocParser genomeLocParser,File fl) {
this.genomeLocParser = genomeLocParser;
try {
mIn = new BufferedReader(new FileReader(fl));
} catch (FileNotFoundException e) {
@ -57,7 +60,7 @@ public class BedParser {
List<GenomeLoc> locArray = new ArrayList<GenomeLoc>();
try {
while ((line = mIn.readLine()) != null) {
locArray.add(parseLocation(line));
locArray.add(parseLocation(genomeLocParser,line));
}
} catch (IOException e) {
throw new UserException.MalformedFile("Unable to parse line in BED file.");
@ -71,7 +74,7 @@ public class BedParser {
* @param line the line, as a string
* @return a parsed genome loc
*/
public static GenomeLoc parseLocation(String line) {
public static GenomeLoc parseLocation(GenomeLocParser genomeLocParser,String line) {
String contig;
int start;
int stop;
@ -85,7 +88,7 @@ public class BedParser {
}
// we currently drop the rest of the bed record, which can contain names, scores, etc
return GenomeLocParser.createGenomeLoc(contig, start, stop);
return genomeLocParser.createGenomeLoc(contig, start, stop);
}

View File

@ -45,7 +45,7 @@ public class DupUtils {
}
}
public static SAMRecord combineDuplicates(List<SAMRecord> duplicates, int maxQScore) {
public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int maxQScore) {
if ( duplicates.size() == 0 )
return null;
@ -63,7 +63,7 @@ public class DupUtils {
//for ( SAMRecord read : duplicates ) {
// System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]);
//}
Pair<Byte, Byte> baseAndQual = combineBaseProbs(duplicates, i, maxQScore);
Pair<Byte, Byte> baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore);
bases[i] = baseAndQual.getFirst();
quals[i] = baseAndQual.getSecond();
}
@ -114,8 +114,8 @@ public class DupUtils {
System.out.printf("%n");
}
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0));
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int readOffset, int maxQScore) {
GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0));
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);
final boolean debug = false;

View File

@ -114,11 +114,10 @@ public class Haplotype {
// Create location for all haplotypes
long startLoc = ref.getWindow().getStart() + startIdxInReference;
long stopLoc = startLoc + haplotypeSize-1;
int startLoc = ref.getWindow().getStart() + startIdxInReference;
int stopLoc = startLoc + haplotypeSize-1;
GenomeLoc locus = GenomeLocParser.createGenomeLoc(ref.getLocus().getContigIndex(),startLoc,
stopLoc);
GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
for (Allele a : vc.getAlleles()) {

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.utils.interval;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
@ -56,17 +57,17 @@ public class IntervalFileMergingIterator implements Iterator<GenomeLoc> {
private IntervalMergingRule myRule;
private File myFile;
public IntervalFileMergingIterator(File f, IntervalMergingRule rule) {
public IntervalFileMergingIterator(GenomeLocParser genomeLocParser,File f, IntervalMergingRule rule) {
myFile = f;
try {
XReadLines reader = new XReadLines(f);
if (f.getName().toUpperCase().endsWith(".BED")) {
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( reader.iterator(),
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(),
StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ;
} else {
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( reader.iterator(),
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(),
StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ;
}
} catch ( FileNotFoundException e ) {

View File

@ -29,7 +29,7 @@ public class IntervalUtils {
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
*/
public static List<GenomeLoc> parseIntervalArguments(List<String> argList, boolean allowEmptyIntervalList) {
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList, boolean allowEmptyIntervalList) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
@ -51,7 +51,7 @@ public class IntervalUtils {
// if it's a file, add items to raw interval list
if (isIntervalFile(fileOrInterval)) {
try {
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
rawIntervals.addAll(parser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
}
catch (Exception e) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e);
@ -60,7 +60,7 @@ public class IntervalUtils {
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
rawIntervals.add(parser.parseGenomeInterval(fileOrInterval));
}
}
}
@ -121,13 +121,13 @@ public class IntervalUtils {
* @param mergingRule A descriptor for the type of merging to perform.
* @return A sorted, merged version of the intervals passed in.
*/
public static GenomeLocSortedSet sortAndMergeIntervals(List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
// sort raw interval list
Collections.sort(intervals);
// now merge raw interval list
intervals = GenomeLocParser.mergeIntervalLocations(intervals, mergingRule);
intervals = parser.mergeIntervalLocations(intervals, mergingRule);
return GenomeLocSortedSet.createSetFromList(intervals);
return GenomeLocSortedSet.createSetFromList(parser,intervals);
}
/**

View File

@ -90,10 +90,6 @@ public class ExtendedEventPileupElement extends PileupElement {
public Type getType() { return type; }
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength);
}
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
// a valid base quality. The following code attempts to compensate for that.'

View File

@ -1,9 +1,6 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.*;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
@ -31,17 +28,23 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
*/
public class ArtificialSAMFileReader extends SAMFileReader {
/**
* The parser, for GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* Backing data store of reads.
*/
private List<SAMRecord> reads = null;
private final List<SAMRecord> reads;
/**
* Construct an artificial SAM file reader.
* @param reads Reads to use as backing data source.
*/
public ArtificialSAMFileReader(SAMRecord... reads) {
public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) {
super( createEmptyInputStream(),true );
this.genomeLocParser = new GenomeLocParser(sequenceDictionary);
this.reads = Arrays.asList(reads);
}
@ -50,11 +53,11 @@ public class ArtificialSAMFileReader extends SAMFileReader {
*/
@Override
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end);
GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end);
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
for( SAMRecord read: reads ) {
GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read);
GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read);
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
}

Some files were not shown because too many files have changed in this diff Show More