diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index f56e14389..0c7e29304 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -48,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; @@ -69,6 +68,11 @@ public abstract class AbstractGenomeAnalysisEngine { */ private ParsingEngine parsingEngine; + /** + * The genomeLocParser can create and parse GenomeLocs. + */ + private GenomeLocParser genomeLocParser; + /** * Accessor for sharded read data. */ @@ -82,6 +86,10 @@ public abstract class AbstractGenomeAnalysisEngine { return referenceDataSource; } + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * Accessor for sharded reference data. */ @@ -136,6 +144,14 @@ public abstract class AbstractGenomeAnalysisEngine { this.parsingEngine = parsingEngine; } + /** + * Explicitly set the GenomeLocParser, for unit testing. + * @param genomeLocParser GenomeLocParser to use. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + /** * Actually run the engine. * @return the value of this traversal. @@ -188,7 +204,7 @@ public abstract class AbstractGenomeAnalysisEngine { GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : loadIntervals(argCollection.intervals, argCollection.intervalMerging, - GenomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging))); + genomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging))); // if no exclude arguments, can return parseIntervalArguments directly if (argCollection.excludeIntervals == null) @@ -221,11 +237,11 @@ public abstract class AbstractGenomeAnalysisEngine { IntervalMergingRule mergingRule, List additionalIntervals) { - return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals, - IntervalUtils.parseIntervalArguments(argList, - this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), - argCollection.BTIMergeRule), - mergingRule); + return IntervalUtils.sortAndMergeIntervals(genomeLocParser,IntervalUtils.mergeListsBySetOperator(additionalIntervals, + IntervalUtils.parseIntervalArguments(genomeLocParser,argList, + this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), + argCollection.BTIMergeRule), + mergingRule); } /** @@ -298,22 +314,22 @@ public abstract class AbstractGenomeAnalysisEngine { protected void initializeDataSources() { logger.info("Strictness is " + argCollection.strictnessLevel); + validateSuppliedReference(); + referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); + validateSuppliedReads(); - readsDataSource = createReadsDataSource(); + readsDataSource = createReadsDataSource(genomeLocParser); for (SamRecordFilter filter : filters) if (filter instanceof SamRecordHeaderFilter) ((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader()); - validateSuppliedReference(); - referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); - sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); RMDTrackBuilder manager = new RMDTrackBuilder(); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()); + manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser); List tracks = manager.getReferenceMetaDataSources(this,argCollection); validateSuppliedReferenceOrderedData(tracks); @@ -330,7 +346,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return A unique identifier for the source file of this read. Exception if not found. */ public SAMReaderID getReaderIDForRead(final SAMRecord read) { - return getDataSource().getReaderID(read); + return getReadsDataSource().getReaderID(read); } /** @@ -339,7 +355,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return The source filename for this read. */ public File getSourceFileForReaderID(final SAMReaderID id) { - return getDataSource().getSAMFile(id); + return getReadsDataSource().getSAMFile(id); } /** @@ -351,7 +367,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return Sets of samples in the merged input SAM stream, grouped by readers */ public List> getSamplesByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> sample_sets = new ArrayList>(readers.size()); @@ -360,7 +376,7 @@ public abstract class AbstractGenomeAnalysisEngine { Set samples = new HashSet(1); sample_sets.add(samples); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { samples.add(g.getSample()); } } @@ -380,7 +396,7 @@ public abstract class AbstractGenomeAnalysisEngine { public List> getLibrariesByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> lib_sets = new ArrayList>(readers.size()); @@ -389,7 +405,7 @@ public abstract class AbstractGenomeAnalysisEngine { Set libs = new HashSet(2); lib_sets.add(libs); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { libs.add(g.getLibrary()); } } @@ -406,22 +422,22 @@ public abstract class AbstractGenomeAnalysisEngine { public Map> getFileToReadGroupIdMapping() { // populate the file -> read group mapping Map> fileToReadGroupIdMap = new HashMap>(); - for (SAMReaderID id: getDataSource().getReaderIDs()) { + for (SAMReaderID id: getReadsDataSource().getReaderIDs()) { Set readGroups = new HashSet(5); - for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) { - if (getDataSource().hasReadGroupCollisions()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(id).getReadGroups()) { + if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes. // If there were, use the SamFileHeaderMerger to translate from the // original read group id to the read group id in the merged stream - readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId())); + readGroups.add(getReadsDataSource().getReadGroupId(id,g.getReadGroupId())); } else { // otherwise, pass through the unmapped read groups since this is what Picard does as well readGroups.add(g.getReadGroupId()); } } - fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups); + fileToReadGroupIdMap.put(getReadsDataSource().getSAMFile(id),readGroups); } return fileToReadGroupIdMap; @@ -440,7 +456,7 @@ public abstract class AbstractGenomeAnalysisEngine { public List> getMergedReadGroupsByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> rg_sets = new ArrayList>(readers.size()); @@ -449,11 +465,11 @@ public abstract class AbstractGenomeAnalysisEngine { Set groups = new HashSet(5); rg_sets.add(groups); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { - if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so: + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { + if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so: // use HeaderMerger to translate original read group id from the reader into the read group id in the // merged stream, and save that remapped read group id to associate it with specific reader - groups.add(getDataSource().getReadGroupId(r, g.getReadGroupId())); + groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId())); } else { // otherwise, pass through the unmapped read groups since this is what Picard does as well groups.add(g.getReadGroupId()); @@ -533,29 +549,17 @@ public abstract class AbstractGenomeAnalysisEngine { } - - /** - * Convenience function that binds RODs using the old-style command line parser to the new style list for - * a uniform processing. - * - * @param name the name of the rod - * @param type its type - * @param file the file to load the rod from - */ - private void bindConvenienceRods(final String name, final String type, final String file) { - argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file})); - } - /** * Gets a data source for the given set of reads. * * @return A data source for the given set of reads. */ - private SAMDataSource createReadsDataSource() { + private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser) { DownsamplingMethod method = getDownsamplingMethod(); return new SAMDataSource( unpackBAMFileList(argCollection.samFiles), + genomeLocParser, argCollection.useOriginalBaseQualities, argCollection.strictnessLevel, argCollection.readBufferSize, @@ -574,7 +578,7 @@ public abstract class AbstractGenomeAnalysisEngine { */ private ReferenceDataSource openReferenceSequenceFile(File refFile) { ReferenceDataSource ref = new ReferenceDataSource(refFile); - GenomeLocParser.setupRefContigOrdering(ref.getReference()); + genomeLocParser = new GenomeLocParser(ref.getReference()); return ref; } @@ -587,7 +591,7 @@ public abstract class AbstractGenomeAnalysisEngine { private List getReferenceOrderedDataSources(List rods) { List dataSources = new ArrayList(); for (RMDTrack rod : rods) - dataSources.add(new ReferenceOrderedDataSource(rod, flashbackData())); + dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData())); return dataSources; } @@ -614,10 +618,12 @@ public abstract class AbstractGenomeAnalysisEngine { * * @return the reads data source */ - public SAMDataSource getDataSource() { + public SAMDataSource getReadsDataSource() { return this.readsDataSource; } + + /** * Sets the collection of GATK main application arguments. * diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 76ec2ed35..07eed89c7 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -165,7 +165,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given"); } - return MicroScheduler.create(this,my_walker,this.getDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads); + return MicroScheduler.create(this,my_walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads); } @Override @@ -258,7 +258,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { */ protected ShardStrategy getShardStrategy(ReferenceSequenceFile drivingDataSource) { GenomeLocSortedSet intervals = this.getIntervals(); - SAMDataSource readsDataSource = this.getDataSource(); + SAMDataSource readsDataSource = this.getReadsDataSource(); ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null); ReferenceDataSource referenceDataSource = this.getReferenceDataSource(); // Use monolithic sharding if no index is present. Monolithic sharding is always required for the original @@ -286,7 +286,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { else { region = new ArrayList(); for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences()) - region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); + region.add(getGenomeLocParser().createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); } return new MonolithicShardStrategy(readsDataSource,shardType,region); @@ -309,13 +309,14 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, + getGenomeLocParser(), intervals); } else shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE,getGenomeLocParser()); } else if (walker instanceof ReadWalker || walker instanceof DuplicateWalker) { shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL; @@ -326,13 +327,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { shardType, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, + getGenomeLocParser(), intervals); } else { shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), shardType, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE, + getGenomeLocParser()); } } else if (walker instanceof ReadPairWalker) { if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname) @@ -344,7 +347,8 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { referenceDataSource.getReference(), ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE, + getGenomeLocParser()); } else throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName()); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 96d49fc2e..830869fdb 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.contexts; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; @@ -41,6 +42,11 @@ import net.sf.samtools.util.StringUtil; public class ReferenceContext { final public static boolean UPPERCASE_REFERENCE = true; + /** + * Facilitates creation of new GenomeLocs. + */ + private GenomeLocParser genomeLocParser; + /** * The locus. */ @@ -101,18 +107,18 @@ public class ReferenceContext { * @param locus locus of interest. * @param base reference base at that locus. */ - public ReferenceContext( GenomeLoc locus, byte base ) { - this( locus, locus, new ForwardingProvider(base) ); + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, byte base ) { + this( genomeLocParser, locus, locus, new ForwardingProvider(base) ); } - public ReferenceContext( GenomeLoc locus, GenomeLoc window, byte[] bases ) { - this( locus, window, new ForwardingProvider(bases) ); + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, byte[] bases ) { + this( genomeLocParser, locus, window, new ForwardingProvider(bases) ); } - public ReferenceContext( GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) { + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) { // if( !window.containsP(locus) ) // throw new StingException("Invalid locus or window; window does not contain locus"); - + this.genomeLocParser = genomeLocParser; this.locus = locus; this.window = window; this.basesProvider = basesProvider; @@ -125,6 +131,10 @@ public class ReferenceContext { } } + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * The locus currently being examined. * @return The current locus. diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index e5e029d74..c54f10aaa 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -219,8 +219,8 @@ public class VariantContextUtils { * @param exp expression * @return true if there is a match */ - public static boolean match(VariantContext vc, JexlVCMatchExp exp) { - return match(vc,Arrays.asList(exp)).get(exp); + public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, JexlVCMatchExp exp) { + return match(genomeLocParser,vc,Arrays.asList(exp)).get(exp); } /** @@ -233,8 +233,8 @@ public class VariantContextUtils { * @param exps expressions * @return true if there is a match */ - public static Map match(VariantContext vc, Collection exps) { - return new JEXLMap(exps,vc); + public static Map match(GenomeLocParser genomeLocParser,VariantContext vc, Collection exps) { + return new JEXLMap(genomeLocParser,exps,vc); } @@ -245,8 +245,8 @@ public class VariantContextUtils { * @param exp expression * @return true if there is a match */ - public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) { - return match(vc,g,Arrays.asList(exp)).get(exp); + public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, JexlVCMatchExp exp) { + return match(genomeLocParser,vc,g,Arrays.asList(exp)).get(exp); } /** @@ -260,8 +260,8 @@ public class VariantContextUtils { * @param exps expressions * @return true if there is a match */ - public static Map match(VariantContext vc, Genotype g, Collection exps) { - return new JEXLMap(exps,vc,g); + public static Map match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, Collection exps) { + return new JEXLMap(genomeLocParser,exps,vc,g); } @@ -306,8 +306,8 @@ public class VariantContextUtils { UNION, INTERSECT } - public static VariantContext simpleMerge(Collection unsortedVCs, byte refBase) { - return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { + return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); } @@ -322,14 +322,14 @@ public class VariantContextUtils { * @param genotypeMergeOptions * @return */ - public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); + return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); } - public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, boolean filteredAreUncalled ) { @@ -357,7 +357,7 @@ public class VariantContextUtils { // establish the baseline info from the first VC VariantContext first = VCs.get(0); String name = first.getSource(); - GenomeLoc loc = getLocation(first); + GenomeLoc loc = getLocation(genomeLocParser,first); Set alleles = new TreeSet(); Map genotypes = new TreeMap(); @@ -380,8 +380,8 @@ public class VariantContextUtils { if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) ) throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); - if ( getLocation(vc).size() > loc.size() ) - loc = getLocation(vc); // get the longest location + if ( getLocation(genomeLocParser,vc).size() > loc.size() ) + loc = getLocation(genomeLocParser,vc); // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; nVariant += vc.isVariant() ? 1 : 0; @@ -753,13 +753,13 @@ public class VariantContextUtils { * @param vc the variant context * @return the genomeLoc */ - public static final GenomeLoc getLocation(VariantContext vc) { - return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); + public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { + return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); } // NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record - public static VariantContext mergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { - if (!mergeIntoMNPvalidationCheck(vc1, vc2)) + public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { + if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) return null; // Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed): @@ -974,9 +974,9 @@ public class VariantContextUtils { } } - private static boolean mergeIntoMNPvalidationCheck(VariantContext vc1, VariantContext vc2) { - GenomeLoc loc1 = VariantContextUtils.getLocation(vc1); - GenomeLoc loc2 = VariantContextUtils.getLocation(vc2); + private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2) { + GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser,vc1); + GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser,vc2); if (!loc1.onSameContig(loc2)) throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java index ee9ec24ec..f36cf3bf6 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -27,6 +27,7 @@ import org.apache.commons.jexl2.JexlContext; import org.apache.commons.jexl2.MapContext; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -49,6 +50,7 @@ import java.util.*; */ class VariantJEXLContext implements JexlContext { + private GenomeLocParser genomeLocParser; // our stored variant context private VariantContext vc; @@ -73,7 +75,8 @@ class VariantJEXLContext implements JexlContext { x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }}); } - public VariantJEXLContext(VariantContext vc) { + public VariantJEXLContext(GenomeLocParser genomeLocParser,VariantContext vc) { + this.genomeLocParser = genomeLocParser; this.vc = vc; } @@ -119,6 +122,7 @@ class VariantJEXLContext implements JexlContext { */ class JEXLMap implements Map { + private final GenomeLocParser genomeLocParser; // our variant context and/or Genotype private final VariantContext vc; private final Genotype g; @@ -130,18 +134,19 @@ class JEXLMap implements Map { private Map jexl; - public JEXLMap(Collection jexlCollection, VariantContext vc, Genotype g) { + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, VariantContext vc, Genotype g) { + this.genomeLocParser = genomeLocParser; this.vc = vc; this.g = g; initialize(jexlCollection); } - public JEXLMap(Collection jexlCollection, VariantContext vc) { - this(jexlCollection, vc, null); + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, VariantContext vc) { + this(genomeLocParser,jexlCollection, vc, null); } - public JEXLMap(Collection jexlCollection, Genotype g) { - this(jexlCollection, null, g); + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, Genotype g) { + this(genomeLocParser,jexlCollection, null, g); } private void initialize(Collection jexlCollection) { @@ -159,14 +164,14 @@ class JEXLMap implements Map { private void createContext() { if ( g == null ) { // todo -- remove dependancy on g to the entire system - jContext = new VariantJEXLContext(vc); + jContext = new VariantJEXLContext(genomeLocParser,vc); } else { Map infoMap = new HashMap(); if ( vc != null ) { // create a mapping of what we know about the variant context, its Chromosome, positions, etc. - infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig()); - infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart())); + infoMap.put("CHROM", VariantContextUtils.getLocation(genomeLocParser,vc).getContig()); + infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(genomeLocParser,vc).getStart())); infoMap.put("TYPE", vc.getType().toString()); infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual())); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index 2aad7242e..6d7631a13 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** * User: hanna @@ -47,7 +48,7 @@ public class AllLocusView extends LocusView { public AllLocusView(LocusShardDataProvider provider) { super( provider ); // Seed the state tracking members with the first possible seek position and the first possible locus context. - locusIterator = new GenomeLocusIterator(provider.getLocus()); + locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus()); if( locusIterator.hasNext() ) { // cache next position and next alignment context nextPosition = locusIterator.next(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index eff273d0e..330a9e4f7 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -97,9 +97,9 @@ public class LocusReferenceView extends ReferenceView { } if(bounds != null) { - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); } } @@ -123,12 +123,12 @@ public class LocusReferenceView extends ReferenceView { if ( loc.getContigIndex() != bounds.getContigIndex() ) throw new ReviewedStingException("Illegal attempt to expand reference view bounds to accommodate location on a different contig."); - bounds = GenomeLocParser.createGenomeLoc(bounds.getContigIndex(), + bounds = genomeLocParser.createGenomeLoc(bounds.getContig(), Math.min(bounds.getStart(),loc.getStart()), Math.max(bounds.getStop(),loc.getStop())); - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); } /** @@ -137,8 +137,8 @@ public class LocusReferenceView extends ReferenceView { */ private void initializeBounds(LocusShardDataProvider provider) { if(provider.getLocus() != null) { - long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength(); - bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(), + int sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength(); + bounds = genomeLocParser.createGenomeLoc(provider.getLocus().getContig(), Math.max(provider.getLocus().getStart(),1), Math.min(provider.getLocus().getStop(),sequenceLength)); } @@ -155,10 +155,10 @@ public class LocusReferenceView extends ReferenceView { } protected GenomeLoc trimToBounds(GenomeLoc l) { - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - if ( l.getStart() < expandedStart ) l = GenomeLocParser.setStart(l, expandedStart); - if ( l.getStop() > expandedStop ) l = GenomeLocParser.setStop(l, expandedStop); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + if ( l.getStart() < expandedStart ) l = genomeLocParser.setStart(l, expandedStart); + if ( l.getStop() > expandedStop ) l = genomeLocParser.setStop(l, expandedStop); return l; } @@ -186,7 +186,7 @@ public class LocusReferenceView extends ReferenceView { public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) { //validateLocation( genomeLoc ); - GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); + GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); int refStart = -1; if (bounds != null) { @@ -200,7 +200,7 @@ public class LocusReferenceView extends ReferenceView { } int len = (int)window.size(); - return new ReferenceContext( genomeLoc, window, new Provider(refStart, len)); + return new ReferenceContext( genomeLocParser, genomeLoc, window, new Provider(refStart, len)); } /** @@ -228,7 +228,7 @@ public class LocusReferenceView extends ReferenceView { * @param locus The locus to expand. * @return The expanded window. */ - private long getWindowStart( GenomeLoc locus ) { + private int getWindowStart( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. if(locus.getStart() < 1) return 1; // if(locus.getStart() < 1) return locus.getStart(); @@ -240,9 +240,9 @@ public class LocusReferenceView extends ReferenceView { * @param locus The locus to expand. * @return The expanded window. */ - private long getWindowStop( GenomeLoc locus ) { + private int getWindowStop( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. - long sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); + int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); if(locus.getStop() > sequenceLength) return sequenceLength; return Math.min( locus.getStop() + windowStop, sequenceLength ); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 0fad873ce..75d03856b 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.ReadProperties; import java.util.Collection; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Presents data sharded by locus to the traversal engine. @@ -22,6 +23,11 @@ public class LocusShardDataProvider extends ShardDataProvider { */ private final ReadProperties sourceInfo; + /** + * The parser, used to create and build new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). */ @@ -37,9 +43,10 @@ public class LocusShardDataProvider extends ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { - super(shard,reference,rods); + public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,genomeLocParser,reference,rods); this.sourceInfo = sourceInfo; + this.genomeLocParser = genomeLocParser; this.locus = locus; this.locusIterator = locusIterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 9814217eb..84b5a5463 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Arrays; import java.util.Collection; @@ -33,6 +34,11 @@ public abstract class LocusView extends LocusIterator implements View { */ protected GenomeLoc locus; + /** + * The GenomeLocParser, used to create new genome locs. + */ + protected GenomeLocParser genomeLocParser; + /** * Source info for this view. Informs the class about downsampling requirements. */ @@ -53,6 +59,7 @@ public abstract class LocusView extends LocusIterator implements View { this.locus = provider.getLocus(); this.sourceInfo = provider.getSourceInfo(); + this.genomeLocParser = provider.getGenomeLocParser(); this.loci = provider.getLocusIterator(); seedNextLocus(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index 034472676..f4adcefb9 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -76,7 +76,7 @@ public class ReadBasedReferenceOrderedView implements View { /** stores a window of data, dropping RODs if we've passed the new reads start point. */ class WindowedData { // the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope - private final TreeMap mapping = new TreeMap(); + private final TreeMap mapping = new TreeMap(); // our current location from the last read we processed private GenomeLoc currentLoc; @@ -109,16 +109,16 @@ class WindowedData { */ private void getStates(ShardDataProvider provider, SAMRecord rec) { - long stop = Integer.MAX_VALUE; + int stop = Integer.MAX_VALUE; // figure out the appropriate alignment stop if (provider.hasReference()) { stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength(); } - + // calculate the range of positions we need to look at - GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), - rec.getAlignmentStart(), - stop); + GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(), + rec.getAlignmentStart(), + stop); states = new ArrayList(); if (provider != null && provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) @@ -144,7 +144,7 @@ class WindowedData { */ public ReadMetaDataTracker getTracker(SAMRecord rec) { updatePosition(rec); - return new ReadMetaDataTracker(rec, mapping); + return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping); } /** @@ -154,7 +154,7 @@ class WindowedData { */ private void updatePosition(SAMRecord rec) { if (states == null) getStates(this.provider, rec); - currentLoc = GenomeLocParser.createGenomeLoc(rec); + currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec); // flush the queue looking for records we've passed over while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart()) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index 98582ce41..d2c097f5d 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -67,10 +67,10 @@ public class ReadReferenceView extends ReferenceView { } public ReferenceContext getReferenceContext( SAMRecord read ) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc loc = genomeLocParser.createGenomeLoc(read); // byte[] bases = super.getReferenceBases(loc); // return new ReferenceContext( loc, loc, bases ); - return new ReferenceContext( loc, loc, getReferenceBasesProvider(loc) ); + return new ReferenceContext( genomeLocParser, loc, loc, getReferenceBasesProvider(loc) ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index b6490794b..04120d4e5 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import java.util.Collection; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Present data sharded by read to a traversal engine. @@ -26,8 +27,8 @@ public class ReadShardDataProvider extends ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection rods) { - super(shard,reference,rods); + public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,genomeLocParser,reference,rods); this.reads = reads; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index 43d122ffb..d6c938f36 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -28,6 +28,11 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; * A view into the reference backing this shard. */ public class ReferenceView implements View { + /** + * The parser, used to create and parse GenomeLocs. + */ + protected final GenomeLocParser genomeLocParser; + /** * The source of reference data. */ @@ -38,6 +43,7 @@ public class ReferenceView implements View { * @param provider */ public ReferenceView( ShardDataProvider provider ) { + this.genomeLocParser = provider.getGenomeLocParser(); this.reference = provider.getReference(); } @@ -68,7 +74,7 @@ public class ReferenceView implements View { } protected byte[] getReferenceBases( SAMRecord read ) { - return getReferenceBases(GenomeLocParser.createGenomeLoc(read)); + return getReferenceBases(genomeLocParser.createGenomeLoc(read)); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 186a7d335..8a67a5db7 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -80,7 +80,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { // the iterator to immediately before it, so that it can be added to the merging iterator primed for // next() to return the first real ROD in this shard LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard()); - it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); + it.seekForward(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart()-1)); states.add(new ReferenceOrderedDataState(dataSource,it)); @@ -128,7 +128,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { tracker = createTracker(allTracksHere); GenomeLoc rodSite = datum.getLocation(); - GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart()); + GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart()); if ( DEBUG ) System.out.printf("rodLocusView.next() is at %s%n", site); @@ -167,7 +167,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ private long getSkippedBases( GenomeLoc currentPos ) { // the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped - Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop(); + Integer compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop(); long skippedBases = currentPos.getStart() - compStop - 1; if ( skippedBases < -1 ) { // minus 1 value is ok @@ -182,7 +182,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { * @return */ public GenomeLoc getLocOneBeyondShard() { - return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1); + return genomeLocParser.createGenomeLoc(locus.getContig(),locus.getStop()+1); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index cb912ccf9..e6f6c9879 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; @@ -37,6 +38,11 @@ public abstract class ShardDataProvider { */ private final Shard shard; + /** + * The parser, used to create and build new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * Provider of reference data for this particular shard. */ @@ -47,6 +53,14 @@ public abstract class ShardDataProvider { */ private final Collection referenceOrderedData; + /** + * Returns the GenomeLocParser associated with this traversal. + * @return The associated parser. + */ + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * Retrieves the shard associated with this data provider. * @return The shard associated with this data provider. @@ -86,8 +100,9 @@ public abstract class ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection rods) { + public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection rods) { this.shard = shard; + this.genomeLocParser = genomeLocParser; this.reference = reference; this.referenceOrderedData = rods; } @@ -96,8 +111,8 @@ public abstract class ShardDataProvider { * Skeletal, package protected constructor for unit tests which require a ShardDataProvider. * @param shard the shard */ - ShardDataProvider(Shard shard) { - this(shard,null,null); + ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser) { + this(shard,genomeLocParser,null,null); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java index 0c4c08480..5cb41182c 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java @@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; public class IntervalSharder { private static Logger logger = Logger.getLogger(IntervalSharder.class); - public static Iterator shardIntervals(final SAMDataSource dataSource, final List loci) { + public static Iterator shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { return new FilePointerIterator(dataSource,loci); } @@ -55,11 +55,13 @@ public class IntervalSharder { */ private static class FilePointerIterator implements Iterator { final SAMDataSource dataSource; + final GenomeLocSortedSet loci; final PeekableIterator locusIterator; final Queue cachedFilePointers = new LinkedList(); - public FilePointerIterator(final SAMDataSource dataSource, final List loci) { + public FilePointerIterator(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { this.dataSource = dataSource; + this.loci = loci; locusIterator = new PeekableIterator(loci.iterator()); advance(); } @@ -82,7 +84,7 @@ public class IntervalSharder { } private void advance() { - List nextBatch = new ArrayList(); + GenomeLocSortedSet nextBatch = new GenomeLocSortedSet(loci.getGenomeLocParser()); String contig = null; while(locusIterator.hasNext() && nextBatch.isEmpty()) { @@ -99,7 +101,7 @@ public class IntervalSharder { } } - private static List shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final List loci) { + private static List shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final GenomeLocSortedSet loci) { // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin. List filePointers = new ArrayList(); FilePointer lastFilePointer = null; @@ -171,7 +173,7 @@ public class IntervalSharder { final int regionStop = Math.min(locationStop,binStart-1); - GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop); + GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop); lastFilePointer = new FilePointer(subset); locationStart = regionStop + 1; @@ -184,7 +186,7 @@ public class IntervalSharder { lastBAMOverlap = null; } - GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop); + GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,locationStop); filePointers.add(new FilePointer(subset)); locationStart = locationStop + 1; @@ -195,7 +197,7 @@ public class IntervalSharder { // The start of the region overlaps the bin. Add the overlapping subset. final int regionStop = Math.min(locationStop,binStop); - lastFilePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop)); + lastFilePointer.addLocation(loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop)); locationStart = regionStop + 1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java index f5003ddb9..3313b992e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java @@ -58,14 +58,14 @@ public class LocusShardStrategy implements ShardStrategy { * @param reads Data source from which to load index data. * @param locations List of locations for which to load data. */ - LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) { + LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) { this.reads = reads; if(!reads.isEmpty()) { - List intervals; + GenomeLocSortedSet intervals; if(locations == null) { // If no locations were passed in, shard the entire BAM file. SAMFileHeader header = reads.getHeader(); - intervals = new ArrayList(); + intervals = new GenomeLocSortedSet(genomeLocParser); for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) { // Check this sequence against the reference sequence dictionary. @@ -73,12 +73,12 @@ public class LocusShardStrategy implements ShardStrategy { SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName()); if(refSequenceRecord != null) { final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength()); - intervals.add(GenomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length)); + intervals.add(genomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length)); } } } else - intervals = locations.toList(); + intervals = locations; this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals); } @@ -89,15 +89,15 @@ public class LocusShardStrategy implements ShardStrategy { for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) { for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) { final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength()); - filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop))); + filePointers.add(new FilePointer(genomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop))); } } } else { for(GenomeLoc interval: locations) { while(interval.size() > maxShardSize) { - filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1))); - interval = GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop()); + filePointers.add(new FilePointer(locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1))); + interval = locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop()); } filePointers.add(new FilePointer(interval)); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java index 18517d1cd..f17441a35 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java @@ -90,7 +90,7 @@ public class ReadShardStrategy implements ShardStrategy { this.locations = locations; if(locations != null) - filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations.toList()); + filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations); else filePointerIterator = filePointers.iterator(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java index 2bc7a3207..9af08c22e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; @@ -50,8 +51,8 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) { - return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, -1L); + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser) { + return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, -1L); } /** @@ -64,10 +65,10 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) { + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, long limitByCount) { switch (strat) { case LOCUS_EXPERIMENTAL: - return new LocusShardStrategy(readsDataSource,referenceDataSource,null); + return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null); case READS_EXPERIMENTAL: return new ReadShardStrategy(readsDataSource,null); default: @@ -87,8 +88,8 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) { - return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, lst, -1l); + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst) { + return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, lst, -1l); } @@ -102,10 +103,10 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return A strategy for shattering this data. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) { + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst, long limitDataCount) { switch (strat) { case LOCUS_EXPERIMENTAL: - return new LocusShardStrategy(readsDataSource,referenceDataSource,lst); + return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst); case READS_EXPERIMENTAL: return new ReadShardStrategy(readsDataSource,lst); default: diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index b6373738a..c652f32cf 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; @@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -47,12 +49,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Create a new reference-ordered data source. * @param rod the reference ordered data */ - public ReferenceOrderedDataSource( RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { this.rod = rod; if (rod.supportsQuery()) - iteratorPool = new ReferenceOrderedQueryDataPool(new RMDTrackBuilder(),rod); + iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod); else - iteratorPool = new ReferenceOrderedDataPool( rod, flashbackData ); + iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData ); } /** @@ -110,7 +112,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { class ReferenceOrderedDataPool extends ResourcePool { private final RMDTrack rod; boolean flashbackData = false; - public ReferenceOrderedDataPool( RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { + super(sequenceDictionary,genomeLocParser); this.flashbackData = flashbackData; this.rod = rod; } @@ -121,7 +124,7 @@ class ReferenceOrderedDataPool extends ResourcePool resources ) { if(segment instanceof MappedStreamSegment) { - GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); + GenomeLoc position = ((MappedStreamSegment)segment).getLocation(); for( LocationAwareSeekableRODIterator RODIterator : resources ) { @@ -178,14 +181,14 @@ class ReferenceOrderedDataPool extends ResourcePool { - // the reference-ordered data itself. private final RMDTrack rod; // our tribble track builder private final RMDTrackBuilder builder; - public ReferenceOrderedQueryDataPool( RMDTrackBuilder builder, RMDTrack rod ) { + public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) { + super(sequenceDictionary,genomeLocParser); this.rod = rod; this.builder = builder; // a little bit of a hack, but it saves us from re-reading the index from the file @@ -209,9 +212,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool { + /** + * Sequence dictionary. + */ + protected final SAMSequenceDictionary sequenceDictionary; + + /** + * Builder/parser for GenomeLocs. + */ + protected final GenomeLocParser genomeLocParser; + /** * All iterators of this reference-ordered data. */ @@ -41,6 +52,11 @@ abstract class ResourcePool { */ private Map resourceAssignments = new HashMap(); + protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) { + this.sequenceDictionary = sequenceDictionary; + this.genomeLocParser = genomeLocParser; + } + /** * Get an iterator whose position is before the specified location. Create a new one if none exists. * @param segment Target position for the iterator. @@ -180,36 +196,11 @@ class MappedStreamSegment implements DataStreamSegment { * Retrieves the first location covered by a mapped stream segment. * @return Location of the first base in this segment. */ - public GenomeLoc getFirstLocation() { - return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart()); + public GenomeLoc getLocation() { + return locus; } public MappedStreamSegment(GenomeLoc locus) { this.locus = locus; } } - -/** - * Models a position within the unmapped reads in a stream of GATK input data. - */ -class UnmappedStreamSegment implements DataStreamSegment { - /** - * Where does this region start, given 0 = the position of the first unmapped read. - */ - public final long position; - - /** - * How many reads wide is this region? This size is generally treated as an upper bound. - */ - public final long size; - - /** - * Create a new target location in an unmapped read stream. - * @param position The 0-based index into the unmapped reads. Position 0 represents the first unmapped read. - * @param size the size of the segment. - */ - public UnmappedStreamSegment( long position, long size ) { - this.position = position; - this.size = size; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 1c063230d..61f137fb4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -63,17 +64,22 @@ public class SAMDataSource implements SimpleDataSource { /** * Runtime metrics of reads filtered, etc. */ - protected final ReadMetrics readMetrics; + private final ReadMetrics readMetrics; + + /** + * Tools for parsing GenomeLocs, for verifying BAM ordering against general ordering. + */ + private final GenomeLocParser genomeLocParser; /** * Identifiers for the readers driving this data source. */ - protected final List readerIDs; + private final List readerIDs; /** * How strict are the readers driving this data source. */ - protected final SAMFileReader.ValidationStringency validationStringency; + private final SAMFileReader.ValidationStringency validationStringency; /** * How far along is each reader? @@ -113,9 +119,10 @@ public class SAMDataSource implements SimpleDataSource { * Create a new SAM data source given the supplied read metadata. * @param samFiles list of reads files. */ - public SAMDataSource(List samFiles) { + public SAMDataSource(List samFiles,GenomeLocParser genomeLocParser) { this( samFiles, + genomeLocParser, false, SAMFileReader.ValidationStringency.STRICT, null, @@ -145,6 +152,7 @@ public class SAMDataSource implements SimpleDataSource { */ public SAMDataSource( List samFiles, + GenomeLocParser genomeLocParser, boolean useOriginalBaseQualities, SAMFileReader.ValidationStringency strictness, Integer readBufferSize, @@ -155,6 +163,7 @@ public class SAMDataSource implements SimpleDataSource { boolean generateExtendedEvents ) { this.readMetrics = new ReadMetrics(); + this.genomeLocParser = genomeLocParser; readerIDs = samFiles; validationStringency = strictness; @@ -520,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource { // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification, // verify the read ordering by applying a sort order iterator if (!noValidationOfReadOrder && enableVerification) - wrappedIterator = new VerifyingSamIterator(wrappedIterator); + wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 1da0cdeed..4290b4960 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -53,9 +53,9 @@ public class LinearMicroScheduler extends MicroScheduler { // New experimental code for managing locus intervals. if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards()); for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods); + ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); @@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler { windowMaker.close(); } else { - ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 74c5ff6f6..de7623de6 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -158,6 +158,12 @@ public abstract class MicroScheduler { traversalEngine.printOnTraversalDone(metrics); } + /** + * Gets the engine that created this microscheduler. + * @return The engine owning this microscheduler. + */ + public GenomeAnalysisEngine getEngine() { return engine; } + /** * Returns data source maintained by this scheduler * @return diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 62788737c..cbd3e822d 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -61,11 +61,11 @@ public class ShardTraverser implements Callable { Object accumulator = walker.reduceInit(); LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards()); ShardDataProvider dataProvider = null; for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); + dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); dataProvider.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index b8afd937b..5172079af 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Buffer shards of data which may or may not contain multiple loci into @@ -51,11 +52,11 @@ public class WindowMaker implements Iterable, I * @param intervals The set of intervals over which to traverse. * @param discards a filter at that indicates read position relative to some locus? */ - public WindowMaker(Shard shard, StingSAMIterator iterator, List intervals, List discards ) { + public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List intervals, List discards ) { this.sourceInfo = shard.getReadProperties(); this.readIterator = iterator; - LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards); + LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,discards); this.sourceIterator = new PeekableIterator(locusIterator); this.intervalIterator = intervals.size()>0 ? new PeekableIterator(intervals.iterator()) : null; diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index a078d9d4e..bc45cc75b 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -22,6 +22,11 @@ import java.util.Iterator; * Iterates through all of the loci provided in the reference. */ public class GenomeLocusIterator implements Iterator { + /** + * Builds individual loci. + */ + private GenomeLocParser parser; + /** * The entire region over which we're iterating. */ @@ -38,9 +43,10 @@ public class GenomeLocusIterator implements Iterator { * @param completeLocus Data provider to use as a backing source. * Provider must have a reference (hasReference() == true). */ - public GenomeLocusIterator( GenomeLoc completeLocus ) { + public GenomeLocusIterator( GenomeLocParser parser, GenomeLoc completeLocus ) { + this.parser = parser; this.completeLocus = completeLocus; - this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); + this.currentLocus = parser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); } /** @@ -59,7 +65,7 @@ public class GenomeLocusIterator implements Iterator { if( !hasNext() ) throw new NoSuchElementException("No elements remaining in bounded reference region."); GenomeLoc toReturn = (GenomeLoc)currentLocus.clone(); - currentLocus = GenomeLocParser.incPos(currentLocus); + currentLocus = parser.incPos(currentLocus); return toReturn; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index f8e8cfe58..659884526 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -61,6 +61,11 @@ public class LocusIteratorByState extends LocusIterator { // ----------------------------------------------------------------------------------------------------------------- private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position + /** + * Used to create new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + private final Collection sampleNames = new ArrayList(); private final ReadStateManager readStates; @@ -129,8 +134,8 @@ public class LocusIteratorByState extends LocusIterator { public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); } - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); + public GenomeLoc getLocation(GenomeLocParser genomeLocParser) { + return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); } public CigarOperator getCurrentCigarOperator() { @@ -268,12 +273,13 @@ public class LocusIteratorByState extends LocusIterator { // constructors and other basic operations // // ----------------------------------------------------------------------------------------------------------------- - public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation ) { - this(samIterator, readInformation, NO_FILTERS); + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser ) { + this(samIterator, readInformation, genomeLocParser, NO_FILTERS); } - public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, List filters ) { + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, List filters ) { this.readInfo = readInformation; + this.genomeLocParser = genomeLocParser; this.filters = filters; // Aggregate all sample names. sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader())); @@ -310,7 +316,7 @@ public class LocusIteratorByState extends LocusIterator { } private GenomeLoc getLocation() { - return readStates.isEmpty() ? null : readStates.getFirst().getLocation(); + return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); } // ----------------------------------------------------------------------------------------------------------------- @@ -354,7 +360,7 @@ public class LocusIteratorByState extends LocusIterator { SAMRecordState our1stState = readStates.getFirst(); // get current location on the reference and decrement it by 1: the indels we just stepped over // are associated with the *previous* reference base - GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1); + GenomeLoc loc = genomeLocParser.incPos(our1stState.getLocation(genomeLocParser),-1); boolean hasBeenSampled = false; for(String sampleName: sampleNames) { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java index 804055140..e5a561176 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -16,11 +16,13 @@ import java.util.Iterator; * To change this template use File | Settings | File Templates. */ public class VerifyingSamIterator implements StingSAMIterator { + private GenomeLocParser genomeLocParser; StingSAMIterator it; SAMRecord last = null; boolean checkOrderP = true; - public VerifyingSamIterator(StingSAMIterator it) { + public VerifyingSamIterator(GenomeLocParser genomeLocParser,StingSAMIterator it) { + this.genomeLocParser = genomeLocParser; this.it = it; } @@ -35,27 +37,19 @@ public class VerifyingSamIterator implements StingSAMIterator { return cur; } - /** - * If true, enables ordered checking of the reads in the file. By default this is enabled. - * @param checkP If true, sam records will be checked to insure they come in order - */ - public void setCheckOrderP( boolean checkP ) { - checkOrderP = checkP; - } - - public void verifyRecord( final SAMRecord last, final SAMRecord cur ) { + private void verifyRecord( final SAMRecord last, final SAMRecord cur ) { if ( checkOrderP && isOutOfOrder(last, cur) ) { this.last = null; throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent:%n%s%n", last.format(), cur.format()) ); } } - public static boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) { + private boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) { if ( last == null || cur.getReadUnmappedFlag() ) return false; else { - GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last ); - GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur ); + GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last ); + GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur ); return curLoc.compareTo(lastLoc) == -1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java index 7a497b9a9..090022269 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java @@ -43,10 +43,15 @@ import java.util.TreeMap; * a read-based meta data tracker */ public class ReadMetaDataTracker { + /** + * The parser, used to create new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + private final SAMRecord record; // the buffer of positions and RODs we've stored - private final TreeMap mapping; + private final TreeMap mapping; /** * create a read meta data tracker, given the read and a queue of RODatum positions @@ -54,7 +59,8 @@ public class ReadMetaDataTracker { * @param record the read to create offset from * @param mapping the mapping of reference ordered datum */ - public ReadMetaDataTracker(SAMRecord record, TreeMap mapping) { + public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap mapping) { + this.genomeLocParser = genomeLocParser; this.record = record; this.mapping = mapping; } @@ -69,13 +75,13 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createReadAlignment(SAMRecord record, TreeMap queue, Class cl, String name) { + private Map> createReadAlignment(SAMRecord record, TreeMap queue, Class cl, String name) { if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified"); - Map> ret = new LinkedHashMap>(); - GenomeLoc location = GenomeLocParser.createGenomeLoc(record); + Map> ret = new LinkedHashMap>(); + GenomeLoc location = genomeLocParser.createGenomeLoc(record); int length = record.getReadLength(); - for (Long loc : queue.keySet()) { - Long position = loc - location.getStart(); + for (Integer loc : queue.keySet()) { + Integer position = loc - location.getStart(); if (position >= 0 && position < length) { Collection set; if (cl != null) @@ -95,11 +101,11 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createGenomeLocAlignment(SAMRecord record, TreeMap mapping, Class cl, String name) { - Map> ret = new LinkedHashMap>(); + private Map> createGenomeLocAlignment(SAMRecord record, TreeMap mapping, Class cl, String name) { + Map> ret = new LinkedHashMap>(); int start = record.getAlignmentStart(); int stop = record.getAlignmentEnd(); - for (Long location : mapping.keySet()) { + for (Integer location : mapping.keySet()) { if (location >= start && location <= stop) if (cl != null) ret.put(location, mapping.get(location).getSet(cl)); @@ -114,7 +120,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping() { + public Map> getReadOffsetMapping() { return createReadAlignment(record, mapping, null, null); } @@ -123,7 +129,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping() { + public Map> getContigOffsetMapping() { return createGenomeLocAlignment(record, mapping, null, null); } @@ -132,7 +138,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping(String name) { + public Map> getReadOffsetMapping(String name) { return createReadAlignment(record, mapping, null, name); } @@ -141,7 +147,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping(String name) { + public Map> getContigOffsetMapping(String name) { return createGenomeLocAlignment(record, mapping, null, name); } @@ -150,7 +156,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping(Class cl) { + public Map> getReadOffsetMapping(Class cl) { return createReadAlignment(record, mapping, cl, null); } @@ -159,7 +165,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping(Class cl) { + public Map> getContigOffsetMapping(Class cl) { return createGenomeLocAlignment(record, mapping, cl, null); } } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java similarity index 58% rename from java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java rename to java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java index 89b527a38..b4427c228 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java @@ -22,18 +22,21 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; + +import org.broad.tribble.FeatureCodec; +import org.broadinstitute.sting.utils.GenomeLocParser; /** - * A suite of utilities for working with the GenomeLocParser - * in the context of the sequence dictionary. + * An interface marking that a given Tribble feature/codec is actually dependent on context within the + * reference, rather than having a dependency only on the contig, start, and stop of the given feature. + * A HACK. Tribble should contain all the information in needs to decode the unqualified position of + * a feature. */ -public class GenomeLocParserTestUtils { +public interface ReferenceDependentFeatureCodec extends FeatureCodec { /** - * Clear out the sequence dictionary associated with - * the genomeloc creator. + * Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features. + * @param genomeLocParser The parser to supply. */ - public static void clearSequenceDictionary() { - GenomeLocParser.clearRefContigOrdering(); - } + public void setGenomeLocParser(GenomeLocParser genomeLocParser); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index c995f53bc..96f78ad09 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.refdata; +import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -38,21 +39,26 @@ import java.util.List; * To change this template use File | Settings | File Templates. */ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { + /** + * The parser, used to construct new genome locs. + */ + private final GenomeLocParser parser; + private PushbackIterator it; List records = null; // here we will keep a pile of records overlaping with current position; when we iterate // and step out of record's scope, we purge it from the list String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator. - long curr_position = 0; // where the iterator is currently positioned on the genome - long max_position = 0; // the rightmost stop position of currently loaded records - int curr_contig = -1; // what contig the iterator is currently on + int curr_position = 0; // where the iterator is currently positioned on the genome + int max_position = 0; // the rightmost stop position of currently loaded records + String curr_contig = null; // what contig the iterator is currently on boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1 // the stop position of the last query. We can query only in forward direction ("seek forward"); // it is not only the start position of every successive query that can not be before the start // of the previous one (curr_start), but it is also illegal for a query interval to *end* before // the end of previous query, otherwise we can end up in an inconsistent state - long curr_query_end = -1; + int curr_query_end = -1; // EXAMPLE of inconsistency curr_query_end guards against: // record 1 record 2 @@ -80,7 +86,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1, // but re-enables next() again after a length-1 query. - public SeekableRODIterator(CloseableIterator it) { + public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator it) { + this.parser = parser; this.it = new PushbackIterator(it); records = new LinkedList(); // the following is a trick: we would like the iterator to know the actual name assigned to @@ -91,6 +98,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { GATKFeature r = null; if (this.it.hasNext()) r = this.it.element(); name = (r==null?null:r.getName()); + + curr_contig = dictionary.getSequence(0).getSequenceName(); } /** @@ -111,14 +120,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance // upon next call to next(). public GenomeLoc peekNextLocation() { - if ( curr_position + 1 <= max_position ) return GenomeLocParser.createGenomeLoc(curr_contig,curr_position+1); + if ( curr_position + 1 <= max_position ) return parser.createGenomeLoc(curr_contig,curr_position+1); // sorry, next reference position is not covered by the RODs we are currently holding. In this case, // the location we will jump to upon next call to next() is the start of the next ROD record that we did // not read yet: if ( it.hasNext() ) { GATKFeature r = it.element(); // peek, do not load! - return GenomeLocParser.createGenomeLoc(r.getLocation().getContigIndex(),r.getLocation().getStart()); + return parser.createGenomeLoc(r.getLocation().getContig(),r.getLocation().getStart()); } return null; // underlying iterator has no more records, there is no next location! } @@ -147,7 +156,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { records.clear(); GATKFeature r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe records.add( r ); - curr_contig = r.getLocation().getContigIndex(); + curr_contig = r.getLocation().getContig(); curr_position = r.getLocation().getStart(); max_position = r.getLocation().getStop(); } @@ -163,11 +172,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { it.next(); continue; } - int that_contig = r.getLocation().getContigIndex(); - if ( curr_contig > that_contig ) + + GenomeLoc currentContig = parser.createOverEntireContig(curr_contig); + GenomeLoc thatContig = r.getLocation(); + + if ( currentContig.isPast(thatContig) ) throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() + " occurs out of order in track " + r.getName() ); - if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet... + if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet... if ( r.getLocation().getStart() < curr_position ) throw new UserException("LocationAwareSeekableRODIterator: track "+r.getName() + @@ -177,7 +189,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { r = it.next(); // we got here only if we do need next record, time to load it for real - long stop = r.getLocation().getStop(); + int stop = r.getLocation().getStop(); if ( stop < curr_position ) throw new ReviewedStingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records records.add(r); @@ -186,7 +198,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // 'records' and current position are fully updated. Last, we need to set the location of the whole track // (collection of ROD records) to the genomic site we are currently looking at, and return the list - return new RODRecordListImpl(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); + return new RODRecordListImpl(name,records, parser.createGenomeLoc(curr_contig,curr_position)); } /** @@ -218,13 +230,13 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { * @return Current ending position of the iterator, or null if no position exists. */ public GenomeLoc position() { - if ( curr_contig < 0 ) return null; + if ( curr_contig == null ) return null; if ( curr_query_end > curr_position ) { // do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed - return GenomeLocParser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end); + return parser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end); } else { - return GenomeLocParser.createGenomeLoc(curr_contig,curr_position); + return parser.createGenomeLoc(curr_contig,curr_position); } } @@ -256,10 +268,11 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { */ public RODRecordList seekForward(GenomeLoc interval) { - if ( interval.getContigIndex() < curr_contig ) + if ( interval.isBefore(parser.createOverEntireContig(curr_contig)) && + !(interval.getStart() == 0 && interval.getStop() == 0 && interval.getContig().equals(curr_contig)) ) // This criteria is syntactic sugar for 'seek to right before curr_contig' throw new ReviewedStingException("Out of order query: query contig "+interval.getContig()+" is located before "+ "the iterator's current contig"); - if ( interval.getContigIndex() == curr_contig ) { + if ( interval.getContig().equals(curr_contig) ) { if ( interval.getStart() < curr_position ) throw new ReviewedStingException("Out of order query: query position "+interval +" is located before "+ "the iterator's current position "+curr_contig + ":" + curr_position); @@ -273,7 +286,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1 - if ( interval.getContigIndex() == curr_contig && curr_position <= max_position ) { + if ( interval.getContig().equals(curr_contig) && curr_position <= max_position ) { // some of the intervals we are currently keeping do overlap with the query interval purgeOutOfScopeRecords(); @@ -281,7 +294,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // clean up and get ready for fast-forwarding towards the requested position records.clear(); max_position = -1; - curr_contig = interval.getContigIndex(); + curr_contig = interval.getContig(); } // curr_contig and curr_position are set to where we asked to scroll to @@ -289,10 +302,12 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { while ( it.hasNext() ) { GATKFeature r = it.next(); if ( r == null ) continue; - int that_contig = r.getLocation().getContigIndex(); - if ( curr_contig > that_contig ) continue; // did not reach requested contig yet - if ( curr_contig < that_contig ) { + GenomeLoc currentContig = parser.createOverEntireContig(curr_contig); + GenomeLoc thatContig = r.getLocation(); + + if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet + if ( currentContig.isBefore(thatContig) ) { it.pushback(r); // next record is on the higher contig, we do not need it yet... break; } @@ -340,4 +355,5 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { public void close() { if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close(); } + } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 3fc76c8a5..bbcce6677 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -181,7 +181,7 @@ public class VariantContextAdaptors { // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); alleles.add(refAllele); - VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); + VariantContext vc = VariantContextUtils.toVC(name, ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); return vc; } else return null; // can't handle anything else diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index a42e103d6..e87986a2e 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -33,15 +33,15 @@ import java.util.StringTokenizer; import org.apache.log4j.Logger; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; -public class AnnotatorInputTableCodec implements FeatureCodec { +public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); @@ -49,6 +49,19 @@ public class AnnotatorInputTableCodec implements FeatureCodec header; + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + /** * Parses the header. * @@ -80,9 +93,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec { +public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2}; private BeagleReaderType readerType; @@ -52,6 +52,19 @@ public class BeagleCodec implements FeatureCodec { private static final String delimiterRegex = "\\s+"; + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + public Feature decodeLoc(String line) { return decode(line); } @@ -147,17 +160,6 @@ public class BeagleCodec implements FeatureCodec { private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)"); - private static GenomeLoc parseMarkerName(String markerName) { - Matcher m = MARKER_PATTERN.matcher(markerName); - if ( m.matches() ) { - String contig = m.group(1); - long start = Long.valueOf(m.group(2)); - return GenomeLocParser.createGenomeLoc(contig, start, start); - } else { - throw new IllegalArgumentException("Malformatted marker string: " + markerName + " required format is chrN:position"); - } - } - @Override public Class getFeatureType() { return BeagleFeature.class; @@ -175,7 +177,7 @@ public class BeagleCodec implements FeatureCodec { BeagleFeature bglFeature = new BeagleFeature(); - final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this + final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this //parse the location: common to all readers bglFeature.setChr(loc.getContig()); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java index 9f7c2709f..9e9024e65 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -13,7 +13,21 @@ import java.util.ArrayList; /** * the ref seq codec */ -public class RefSeqCodec implements FeatureCodec { +public class RefSeqCodec implements ReferenceDependentFeatureCodec { + + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + @Override + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } @Override public Feature decodeLoc(String line) { @@ -21,19 +35,19 @@ public class RefSeqCodec implements FeatureCodec { String fields[] = line.split("\t"); if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length); String contig_name = fields[2]; - return new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); + return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); } /** Fills this object from a text line in RefSeq (UCSC) text dump file */ @Override - public Feature decode(String line) { + public RefSeqFeature decode(String line) { if (line.startsWith("#")) return null; String fields[] = line.split("\t"); // we reference postion 15 in the split array below, make sure we have at least that many columns if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length); String contig_name = fields[2]; - RefSeqFeature feature = new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); + RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); feature.setTranscript_id(fields[1]); if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1); @@ -41,8 +55,8 @@ public class RefSeqCodec implements FeatureCodec { else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line); - feature.setTranscript_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); - feature.setTranscript_coding_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]))); + feature.setTranscript_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + feature.setTranscript_coding_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]))); feature.setGene_name(fields[12]); String[] exon_starts = fields[9].split(","); String[] exon_stops = fields[10].split(","); @@ -57,7 +71,7 @@ public class RefSeqCodec implements FeatureCodec { ArrayList exon_frames = new ArrayList(eframes.length); for ( int i = 0 ; i < exon_starts.length ; i++ ) { - exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); + exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); exon_frames.add(Integer.decode(eframes[i])); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index af895b262..19fe064ce 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -25,15 +25,8 @@ public class RefSeqFeature implements Transcript, Feature { private List exon_frames; private String name; - // store the contig, start, and stop for this record - private final String contig; - private final int start; - private final int stop; - - public RefSeqFeature(String contig, int start, int stop) { - this.contig = contig; - this.start = start; - this.stop = stop; + public RefSeqFeature(GenomeLoc genomeLoc) { + this.transcript_interval = genomeLoc; } /** Returns id of the transcript (RefSeq NM_* id) */ @@ -44,8 +37,6 @@ public class RefSeqFeature implements Transcript, Feature { /** Returns transcript's full genomic interval (includes all exons with UTRs) */ public GenomeLoc getLocation() { - if (transcript_interval == null) - transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop); return transcript_interval; } @@ -270,16 +261,16 @@ public class RefSeqFeature implements Transcript, Feature { @Override public String getChr() { - return contig; + return transcript_interval.getContig(); } @Override public int getStart() { - return start; + return transcript_interval.getStart(); } @Override public int getEnd() { - return stop; + return transcript_interval.getStop(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java index c24eb94e6..a930e825f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -14,12 +15,27 @@ import java.util.*; /** * implementation of a simple table (tab or comma delimited format) input files... more improvements to come */ -public class TableCodec implements FeatureCodec { +public class TableCodec implements ReferenceDependentFeatureCodec { private String delimiterRegex = "\\s+"; private String headerDelimiter = "HEADER"; private String commentDelimiter = "#"; private ArrayList header = new ArrayList(); + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + @Override + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + + @Override public Feature decodeLoc(String line) { return decode(line); @@ -34,7 +50,7 @@ public class TableCodec implements FeatureCodec { throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format"); - return new TableFeature(GenomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); + return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); } @Override diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 7956e4469..ab11b9333 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -30,6 +30,7 @@ import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; @@ -59,6 +60,11 @@ public class RMDTrack { // our sequence dictionary, which can be null private final SAMSequenceDictionary dictionary; + /** + * Parser to use when creating/parsing GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + // our codec type private final FeatureCodec codec; @@ -101,13 +107,14 @@ public class RMDTrack { * @param dict the sam sequence dictionary * @param codec the feature codec we use to decode this type */ - public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) { + public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { this.type = type; this.recordType = codec.getFeatureType(); this.name = name; this.file = file; this.reader = reader; this.dictionary = dict; + this.genomeLocParser = genomeLocParser; this.codec = codec; } @@ -117,7 +124,7 @@ public class RMDTrack { */ public CloseableIterator getIterator() { try { - return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.iterator(),this.getName()); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e); } @@ -133,19 +140,19 @@ public class RMDTrack { } public CloseableIterator query(GenomeLoc interval) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } public CloseableIterator query(GenomeLoc interval, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } public CloseableIterator query(String contig, int start, int stop) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); } public CloseableIterator query(String contig, int start, int stop, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 98ab07a84..b3bd6ef86 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -35,11 +35,13 @@ import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.source.CachingFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -80,18 +82,34 @@ public class RMDTrackBuilder extends PluginManager { // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; + /** + * Private genome loc parser to use when building out new locs. + */ + private GenomeLocParser genomeLocParser; + /** Create a new plugin manager. */ public RMDTrackBuilder() { super(FeatureCodec.class, "Codecs", "Codec"); } + /** + * Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined. + * @param dict + * @param genomeLocParser + */ + public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) { + super(FeatureCodec.class, "Codecs", "Codec"); + setSequenceDictionary(dict,genomeLocParser); + } + /** * * @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups */ - public void setSequenceDictionary(SAMSequenceDictionary dict) { + public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) { this.dict = dict; - } + this.genomeLocParser = genomeLocParser; + } /** @return a list of all available track types we currently have access to create */ public Map getAvailableTrackNamesAndTypes() { @@ -115,6 +133,7 @@ public class RMDTrackBuilder extends PluginManager { /** * create a RMDTrack of the specified type * + * @param genomeLocParser GenomeLocParser to use, if case track needs additional reference context. * @param targetClass the target class of track * @param name what to call the track * @param inputFile the input file @@ -127,7 +146,7 @@ public class RMDTrackBuilder extends PluginManager { // return a feature reader track Pair pair = createFeatureReader(targetClass, name, inputFile); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name)); + return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name)); } /** @@ -186,6 +205,8 @@ public class RMDTrackBuilder extends PluginManager { FeatureCodec codex = this.createByType(targetClass); if ( codex instanceof NameAwareCodec ) ((NameAwareCodec)codex).setName(name); + if(codex instanceof ReferenceDependentFeatureCodec) + ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); return codex; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 649ffeb24..462bf98df 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; import org.broad.tribble.iterators.CloseableTribbleIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; @@ -39,10 +40,12 @@ import java.util.Iterator; * a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs) */ public class FeatureToGATKFeatureIterator implements CloseableIterator { + private final GenomeLocParser genomeLocParser; private final CloseableTribbleIterator iterator; private final String name; - public FeatureToGATKFeatureIterator(CloseableTribbleIterator iter, String name) { + public FeatureToGATKFeatureIterator(GenomeLocParser genomeLocParser,CloseableTribbleIterator iter, String name) { + this.genomeLocParser = genomeLocParser; this.name = name; this.iterator = iter; } @@ -54,7 +57,7 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator { + private GenomeLocParser genomeLocParser; + private PushbackIterator it = null; public enum FORMAT { BED, GATK }; FORMAT myFormat = FORMAT.GATK; - public StringToGenomeLocIteratorAdapter(Iterator it, FORMAT format) { + public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it, FORMAT format) { + this.genomeLocParser = genomeLocParser; this.it = new PushbackIterator(it); myFormat = format; } - public StringToGenomeLocIteratorAdapter(Iterator it ) { - this(it,FORMAT.GATK); + public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it ) { + this(genomeLocParser,it,FORMAT.GATK); } public boolean hasNext() { @@ -81,8 +84,8 @@ public class StringToGenomeLocIteratorAdapter implements Iterator { public GenomeLoc next() { - if ( myFormat == FORMAT.GATK ) return GenomeLocParser.parseGenomeInterval( it.next() ); - return BedParser.parseLocation( it.next() ); + if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() ); + return BedParser.parseLocation( genomeLocParser,it.next() ); } public void remove() { diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index d28911d11..41bfcc872 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -48,7 +48,7 @@ public abstract class TraversalEngine,Provide /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - private GenomeAnalysisEngine engine; + protected GenomeAnalysisEngine engine; /** * Gets the named traversal type associated with the given traversal. @@ -74,7 +74,7 @@ public abstract class TraversalEngine,Provide public void printProgress(Shard shard,GenomeLoc loc) { // A bypass is inserted here for unit testing. // TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. - ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics(); + ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics(); cumulativeMetrics.incrementMetrics(shard.getReadMetrics()); printProgress(loc, cumulativeMetrics, false); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 2740eae46..89ff688a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -59,12 +59,12 @@ public class TraverseDuplicates extends TraversalEngine readsAtLoc(final SAMRecord read, PushbackIterator iter) { - GenomeLoc site = GenomeLocParser.createGenomeLoc(read); + GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); ArrayList l = new ArrayList(); l.add(read); for (SAMRecord read2 : iter) { - GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2); + GenomeLoc site2 = engine.getGenomeLocParser().createGenomeLoc(read2); // the next read starts too late if (site2.getStart() != site.getStart()) { @@ -114,7 +114,7 @@ public class TraverseDuplicates extends TraversalEngine findDuplicateReads(SAMRecord read, Set> readSets ) { if ( read.getReadPairedFlag() ) { // paired - final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); + final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); for (List reads : readSets) { SAMRecord key = reads.get(0); @@ -123,7 +123,7 @@ public class TraverseDuplicates extends TraversalEngine Adding read to dups list: %s %d %s vs. %s", read, reads.size(), readMateLoc, keyMateLoc)); @@ -176,7 +176,7 @@ public class TraverseDuplicates extends TraversalEngine> readSets = uniqueReadSets(readsAtLoc(read, iter)); if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size())); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 4dbf158d6..240176f2f 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -57,7 +57,7 @@ public class TraverseLoci extends TraversalEngine,Locu // if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions // associated with the current site), we need to update the location. The updated location still starts // at the current genomic position, but it has to span the length of the longest deletion (if any). - location = GenomeLocParser.setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength()); + location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength()); // it is possible that the new expanded location spans the current shard boundary; the next method ensures // that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 4cb5b4949..670676b48 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -102,7 +102,7 @@ public class TraverseReads extends TraversalEngine,Read sum = walker.reduce(x, sum); } - GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()); + GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart()); printProgress(dataProvider.getShard(),locus); } return sum; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index d2941c637..ab4bf068b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -225,7 +225,7 @@ public class VariantAnnotator extends RodWalker { vcfWriter.add(annotatedVC, ref.getBase()); } else { // check to see if the buffered context is different (in location) this context - if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) { + if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) { for ( VariantContext annotatedVC : indelBufferContext ) vcfWriter.add(annotatedVC, ref.getBase()); indelBufferContext = annotatedVCs; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 7186e8036..e00e7c01b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -165,10 +165,10 @@ public class ProduceBeagleInputWalker extends RodWalker { } public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { - GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC); + GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC); beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart())); if ( beagleGenotypesWriter != null ) { - beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString())); + beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC).toString())); } for ( Allele allele : preferredVC.getAlleles() ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 21a0f4241..2cf640317 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -100,10 +100,12 @@ public class CallableLociWalker extends LocusWalker, Gen } // otherwise, merge them else { - sum = GenomeLocParser.setStop(sum, value.first.getStop()); + sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop()); fasta.append(value.second); } return sum; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index 5d43a67ac..a5a74ffa4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -2,13 +2,16 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; public class ClusteredSnps { + private GenomeLocParser genomeLocParser; private int window = 10; private int snpThreshold = 3; - public ClusteredSnps(int snpThreshold, int window) { + public ClusteredSnps(GenomeLocParser genomeLocParser,int snpThreshold, int window) { + this.genomeLocParser = genomeLocParser; this.window = window; this.snpThreshold = snpThreshold; if ( window < 1 || snpThreshold < 1 ) @@ -29,7 +32,7 @@ public class ClusteredSnps { throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details"); // find the nth variant - GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext()); + GenomeLoc left = VariantContextUtils.getLocation(genomeLocParser,variants[i].getVariantContext()); GenomeLoc right = null; int snpsSeen = 1; @@ -37,7 +40,7 @@ public class ClusteredSnps { while ( ++currentIndex < variants.length ) { if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) { if ( ++snpsSeen == snpThreshold ) { - right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext()); + right = VariantContextUtils.getLocation(genomeLocParser,variants[currentIndex].getVariantContext()); break; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 31a617f68..3ddb0d2da 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -117,7 +117,7 @@ public class VariantFiltrationWalker extends RodWalker { public void initialize() { if ( clusterWindow > 0 ) - clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); + clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow); filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); @@ -188,7 +188,7 @@ public class VariantFiltrationWalker extends RodWalker { Set filters = new LinkedHashSet(g.getFilters()); for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) { - if ( VariantContextUtils.match(vc, g, exp) ) + if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, g, exp) ) filters.add(exp.name); } genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased())); @@ -211,7 +211,7 @@ public class VariantFiltrationWalker extends RodWalker { filters.add(CLUSTERED_SNP_FILTER_NAME); for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { - if ( VariantContextUtils.match(vc, exp) ) + if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, exp) ) filters.add(exp.name); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index c98f0639a..e9fb6e875 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -133,7 +133,7 @@ public class BatchedCallsMerger extends LocusWalker imp } // merge the variant contexts - return VariantContextUtils.simpleMerge(calls, ref.getBase()); + return VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), calls, ref.getBase()); } public static AlignmentContext filterForSamples(ReadBackedPileup pileup, Set samples) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index 25ba13061..dd83fe7bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.contexts.*; import java.util.*; public class SimpleIndelCalculationModel extends GenotypeCalculationModel { + private final GenomeLocParser genomeLocParser; private int MIN_COVERAGE = 6; private double MIN_FRACTION = 0.3; @@ -20,7 +21,9 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { // the previous normal event context // private Map cachedContext; - protected SimpleIndelCalculationModel() {} + protected SimpleIndelCalculationModel(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } private int totalIndels = 0; private int totalCoverage = 0; @@ -70,7 +73,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { if ( bestEvent.charAt(0) == '-' ) { alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); alleles.add( Allele.create(bestEvent.substring(1), true )); - loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1); + loc = genomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1); } else throw new ReviewedStingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index a253cb792..bed012c44 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -239,7 +239,9 @@ public class IndelGenotyperV2Walker extends ReadWalker { FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { - refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq")); + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e); } @@ -257,7 +259,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { int nNorm = 0; int nTum = 0; - for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) { + for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) { List tags = rid.getTags() ; if ( tags.isEmpty() && call_somatic ) throw new UserException.BadInput("In somatic mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+ @@ -297,12 +299,12 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) { // prepare to read intervals one-by-one, as needed (assuming they are sorted). - genotypeIntervals = new IntervalFileMergingIterator( + genotypeIntervals = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(), new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); } else { // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals( - IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); + GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(), + IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); genotypeIntervals = locs.iterator(); } currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null; @@ -310,7 +312,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { } - location = GenomeLocParser.createGenomeLoc(0,1); + location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1); // List> readGroupSets = getToolkit().getMergedReadGroupsByReaders(); // List> sampleSets = getToolkit().getSamplesByReaders(); @@ -387,8 +389,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { currentPosition = read.getAlignmentStart(); refName = new String(read.getReferenceName()); - location = GenomeLocParser.setContig(location,refName); - contigLength = GenomeLocParser.getContigInfo(refName).getSequenceLength(); + location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop()); + contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength(); outOfContigUserWarned = false; normal_context.clear(); // reset coverage window; this will also set reference position to 0 @@ -543,7 +545,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { } long move_to = adjustedPosition; - for ( long pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) { + for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) { if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels @@ -579,8 +581,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right: if ( right > normal_context.getStop() ) right = normal_context.getStop(); - location = GenomeLocParser.setStart(location,pos); - location = GenomeLocParser.setStop(location,pos); // retrieve annotation data + location = getToolkit().getGenomeLocParser().setStart(location,pos); + location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data if ( normalCall.isCall() ) { normalCallsMade++; @@ -692,7 +694,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop()); - for ( long pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) { + for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) { if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor @@ -735,8 +737,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right - location = GenomeLocParser.setStart(location,pos); - location = GenomeLocParser.setStop(location,pos); // retrieve annotation data + location = getToolkit().getGenomeLocParser().setStart(location,pos); + location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data if ( tumorCall.isCall() ) { tumorCallsMade++; @@ -1395,13 +1397,13 @@ public class IndelGenotyperV2Walker extends ReadWalker { class WindowContext implements IndelListener { private Set reads; - private long start=0; // where the window starts on the ref, 1-based + private int start=0; // where the window starts on the ref, 1-based private CircularArray< List< IndelVariant > > indels; private List emptyIndelList = new ArrayList(); - public WindowContext(long start, int length) { + public WindowContext(int start, int length) { this.start = start; indels = new CircularArray< List >(length); // reads = new LinkedList(); @@ -1412,13 +1414,13 @@ public class IndelGenotyperV2Walker extends ReadWalker { * * @return */ - public long getStart() { return start; } + public int getStart() { return start; } /** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for. * * @return */ - public long getStop() { return start + indels.length() - 1; } + public int getStop() { return start + indels.length() - 1; } /** Resets reference start position to 0 and clears the context. * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 72136dd3a..3b062c2e1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -211,9 +211,9 @@ public class IndelRealigner extends ReadWalker { for (String fileOrInterval : intervalsFile.split(";")) { // if it's a file, add items to raw interval list if (IntervalUtils.isIntervalFile(fileOrInterval)) { - merger.add(new IntervalFileMergingIterator( new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); + merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); } else { - rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval)); + rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval)); } } if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator()); @@ -221,7 +221,7 @@ public class IndelRealigner extends ReadWalker { intervals = merger; } else { // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY); + GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY); intervals = locs.iterator(); } currentInterval = intervals.hasNext() ? intervals.next() : null; @@ -239,9 +239,9 @@ public class IndelRealigner extends ReadWalker { nwayWriters = new HashMap(); - for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) { + for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) { - String fName = getToolkit().getDataSource().getSAMFile(rid).getName(); + String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName(); int pos ; if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM"); @@ -383,10 +383,10 @@ public class IndelRealigner extends ReadWalker { return 0; } - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(read); // hack to get around unmapped reads having screwy locations if ( readLoc.getStop() == 0 ) - readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart()); + readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart()); if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) { // TODO -- it would be nice if we could use indels from 454 reads as alternate consenses @@ -1414,7 +1414,7 @@ public class IndelRealigner extends ReadWalker { } } - private static class ReadBin { + private class ReadBin { private final ArrayList reads = new ArrayList(); private byte[] reference = null; @@ -1426,11 +1426,11 @@ public class IndelRealigner extends ReadWalker { // This can happen if e.g. there's a large known indel with no overlapping reads. public void add(SAMRecord read) { - GenomeLoc locForRead = GenomeLocParser.createGenomeLoc(read); + GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read); if ( loc == null ) loc = locForRead; else if ( locForRead.getStop() > loc.getStop() ) - loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), locForRead.getStop()); + loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), loc.getStart(), locForRead.getStop()); reads.add(read); } @@ -1441,9 +1441,9 @@ public class IndelRealigner extends ReadWalker { // set up the reference if we haven't done so yet if ( reference == null ) { // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) - long padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1); - long padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); - loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), padLeft, padRight); + int padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1); + int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); + loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight); reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); StringUtil.toUpperCase(reference); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 6a39f36b6..8dc69eeaa 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -92,7 +92,7 @@ public class RealignerTargetCreator extends RodWalker pointEvents = new ArrayList(); + private ArrayList pointEvents = new ArrayList(); - public Event(GenomeLoc loc, long furthestStopPos, EVENT_TYPE type) { + public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) { this.loc = loc; this.furthestStopPos = furthestStopPos; this.type = type; @@ -254,9 +254,9 @@ public class RealignerTargetCreator extends RodWalker 0 ) { - long lastPosition = pointEvents.get(pointEvents.size()-1); + int lastPosition = pointEvents.get(pointEvents.size()-1); if ( newPosition - lastPosition < windowSize ) { eventStopPos = Math.max(eventStopPos, newPosition); furthestStopPos = e.furthestStopPos; @@ -272,7 +272,7 @@ public class RealignerTargetCreator extends RodWalker= 0 && eventStopPos - eventStartPos < maxIntervalSize; + return getToolkit().getGenomeLocParser().validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize; } public String toString() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java index 8455bfda1..b1313f5f4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java @@ -33,6 +33,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFWriter; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -44,6 +45,8 @@ import java.util.*; public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter { private VCFWriter innerWriter; + private GenomeLocParser genomeLocParser; + private ReferenceSequenceFile referenceFileForMNPmerging; private int maxGenomicDistanceForMNP; @@ -64,8 +67,9 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite // Should we call innerWriter.close() in close() private boolean takeOwnershipOfInner; - public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { + public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { this.innerWriter = innerWriter; + this.genomeLocParser = genomeLocParser; this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile); this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP; this.useSingleSample = singleSample; @@ -83,8 +87,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite this.takeOwnershipOfInner = takeOwnershipOfInner; } - public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) { - this(innerWriter, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics + public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) { + this(innerWriter, genomeLocParser, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics } public void writeHeader(VCFHeader header) { @@ -113,7 +117,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite return; } - logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc)); + logger.debug("Next VC input = " + VariantContextUtils.getLocation(genomeLocParser,vc)); boolean curVcIsNotFiltered = vc.isNotFiltered(); if (vcfrWaitingToMerge == null) { @@ -123,20 +127,20 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!"); if (curVcIsNotFiltered) { // still need to wait before can release vc - logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc)); + logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser,vc)); vcfrWaitingToMerge = new VCFRecord(vc, refBase, false); } else if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc)); + logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser,vc)); innerWriter.add(vc, refBase); } } else { // waiting to merge vcfrWaitingToMerge - logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc)); + logger.debug("Waiting to merge " + VariantContextUtils.getLocation(genomeLocParser,vcfrWaitingToMerge.vc)); if (!curVcIsNotFiltered) { if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc)); + logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser,vc)); filteredVcfrList.add(new VCFRecord(vc, refBase, false)); } } @@ -164,7 +168,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite boolean mergedRecords = false; if (mergeDistanceInRange) { numRecordsWithinDistance++; - VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging); + VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser,vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging); if (mergedVc != null) { mergedRecords = true; vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true); @@ -209,8 +213,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite return numMergedRecords; } - public static int minDistance(VariantContext vc1, VariantContext vc2) { - return VariantContextUtils.getLocation(vc1).minDistance(VariantContextUtils.getLocation(vc2)); + public int minDistance(VariantContext vc1, VariantContext vc2) { + return VariantContextUtils.getLocation(genomeLocParser,vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser,vc2)); } /** @@ -354,10 +358,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) { aas.segregationUnknown++; - logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); } else if (gt1.isHomRef() || gt2.isHomRef()) { - logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); aas.eitherNotVariant++; } else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant): @@ -386,7 +390,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite // Check MNPs vs. CHets: if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) { - logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet())) throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index 5d941ec17..20fc4cf76 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -78,7 +78,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker don't take control of writer, since didn't create it: - vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics); + vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics); writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter] // setup the header fields: diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 0bf2d3cb9..5d0aca9c9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -138,7 +138,7 @@ public class ReadBackedPhasingWalker extends RodWalker use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged: - writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false); + writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false); /* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow @@ -197,7 +197,7 @@ public class ReadBackedPhasingWalker extends RodWalker= 0; i--) { + SAMSequenceDictionary dictionary = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(); + SAMSequenceRecord contig = dictionary.getSequence(cur.getContig()); + if(cur.getStop() < contig.getSequenceLength()) + cur = getToolkit().getGenomeLocParser().incPos(cur,1); + else + cur = getToolkit().getGenomeLocParser().createGenomeLoc(dictionary.getSequence(contig.getSequenceIndex()+1).getSequenceName(),1,1); + out.printf("%s: skipped%n", cur); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index 72ba37830..bc68be592 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -87,11 +87,11 @@ public class ValidatingPileupWalker extends LocusWalker { logger.info("Loading SNP mask... "); ReferenceOrderedData snp_mask; if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser()); CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); - snpMaskIterator = new SeekableRODIterator(iter); + snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter); } else { // TODO: fix me when Plink is back @@ -142,8 +142,8 @@ public class PickSequenomProbes extends RodWalker { if ( ! haveMaskForWindow ) { String contig = context.getLocation().getContig(); - long offset = context.getLocation().getStart(); - long true_offset = offset - 200; + int offset = context.getLocation().getStart(); + int true_offset = offset - 200; // we have variant; let's load all the snps falling into the current window and prepare the mask array. // we need to do it only once per window, regardless of how many vcs we may have at this location! @@ -152,7 +152,7 @@ public class PickSequenomProbes extends RodWalker { for ( int i = 0 ; i < 401; i++ ) maskFlags[i] = 0; - RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200)); + RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200)); if ( snpList != null && snpList.size() != 0 ) { Iterator snpsInWindow = snpList.iterator(); int i = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 5bc147725..3c1e1ce4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -639,7 +639,7 @@ public class VariantEvalWalker extends RodWalker implements Tr else if ( group.requiresNovel() && vcKnown ) return false; - if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) ) + if ( group.selectExp != null && ! VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, group.selectExp) ) return false; // nothing invalidated our membership in this set diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index def39ba3c..c82c18c09 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -178,7 +178,7 @@ public class GenerateVariantClustersWalker extends RodWalker { vc = VariantContextUtils.reverseComplement(vc); } - vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length)); + vc = VariantContextUtils.modifyLocation(vc, getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length)); VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false); if ( originalVC.isSNP() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e8e57986c..8addc10d8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker { if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { //System.out.printf("%s%n",sub.toString()); for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { - if ( !VariantContextUtils.match(sub, jexl) ) { + if ( !VariantContextUtils.match(getToolkit().getGenomeLocParser(),sub, jexl) ) { return 0; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java index 87b82c7c8..60078cbfb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java @@ -56,7 +56,7 @@ public class CreateTiTvTrack extends RodWalker { window.update(VariantContextUtils.isTransition(vc)); if ( window.getTiTv() != null ) { - writer.writeData(VariantContextUtils.getLocation(vc),window.getTiTv()); + writer.writeData(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc),window.getTiTv()); } return window; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java index dd9fab657..34b08a089 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java @@ -287,7 +287,7 @@ public class DSBWalkerV3 extends ReadWalker { } - private void shiftWindows(long pos) { + private void shiftWindows(int pos) { // we shift windows when there is a read that does not fit into the current window. // the position, to which the shift is performed, is the first position such that the new read // can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope - @@ -332,7 +332,7 @@ public class DSBWalkerV3 extends ReadWalker { purgeSignal(pos); purgeControl(pos); - currentWindow = GenomeLocParser.createGenomeLoc(currentWindow.getContigIndex(),pos,pos+WINDOW_SIZE-1); + currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(currentWindow.getContig(),pos,pos+WINDOW_SIZE-1); } @Override @@ -349,7 +349,8 @@ public class DSBWalkerV3 extends ReadWalker { controlReadGroups = readGroupSets.get(1); // System.out.println(controlReadGroups.size()+" read groups in control"); - currentWindow = GenomeLocParser.createGenomeLoc(0,1,WINDOW_SIZE); + String sequenceName = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequence(0).getSequenceName(); + currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(sequenceName,1,WINDOW_SIZE); readsInSignalWindow = new LinkedList(); readsInControlWindow = new LinkedList(); @@ -366,7 +367,7 @@ public class DSBWalkerV3 extends ReadWalker { if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) { printRegion(); // print all we had on the previous contig - currentWindow = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(), + currentWindow = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(), read.getAlignmentStart(), read.getAlignmentStart()+WINDOW_SIZE-1); currentContig = read.getReferenceName(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java index a2ff62a72..ea904d26e 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java @@ -95,7 +95,7 @@ public class DesignFileGeneratorWalker extends RodWalker { } for ( Map.Entry additionalGenes : currentBedFeatures.entrySet() ) { - GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd()); + GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd()); if ( interval.overlapsP(entryLoc) && ! additionalGenes.getValue().getName().equals("") && ! intervalBuffer.get(interval).geneNames.contains(additionalGenes.getKey()+"_"+additionalGenes.getValue().getName())) { @@ -142,7 +142,7 @@ public class DesignFileGeneratorWalker extends RodWalker { } for ( Map.Entry entry : currentBedFeatures.entrySet() ) { - GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd()); + GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd()); if ( entryLoc.isBefore(ref.getLocus()) ) { currentBedFeatures.remove(entry.getKey()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index 4e682956e..c07f2f168 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -39,7 +39,9 @@ public class IndelAnnotator extends RodWalker { FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { - refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq")); + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(RefseqFileName, e); } @@ -128,14 +130,14 @@ public class IndelAnnotator extends RodWalker { } else { if ( RefSeqFeature.isCoding(ann) ) { //b.append(annIntron); // not in exon, but within the coding region = intron - GenomeLoc ig = GenomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); + GenomeLoc ig = getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); GenomeLoc cl = t.getCodingLocation(); GenomeLoc g = t.getLocation(); boolean spliceSiteDisruption = false; for (GenomeLoc exon : t.getExons()) { - GenomeLoc expandedExon = GenomeLocParser.createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6); + GenomeLoc expandedExon = getToolkit().getGenomeLocParser().createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6); if (ig.overlapsP(expandedExon)) { spliceSiteDisruption = true; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index 5bc71c8cf..b90a152dc 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -71,7 +72,7 @@ public class IndelDBRateWalker extends RodWalker private void finalUpdate(OverlapTabulator tab) { while ( ! evalContexts.isEmpty() ) { - tab.update(emptyOverlapTable()); + tab.update(emptyOverlapTable(getToolkit().getGenomeLocParser())); } } @@ -119,25 +120,25 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable getOverlapTable(ReferenceContext ref) { // step 1: check that the eval queue is non-empty and that we are outside the window - if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) { + if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) { return null; } // step 2: discard all comp variations which come before the window - while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(compContexts.get(0)).isBefore(ref.getLocus()) && - VariantContextUtils.getLocation(compContexts.get(0)).distance(ref.getLocus()) > indelWindow) { + while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).isBefore(ref.getLocus()) && + VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).distance(ref.getLocus()) > indelWindow) { compContexts.remove(0); } // step 3: see if there are any contexts left; if so then they must be within the window if ( ! compContexts.isEmpty() ) { return nonEmptyOverlapTable(ref); } else { - return emptyOverlapTable(); + return emptyOverlapTable(ref.getGenomeLocParser()); } } - public OverlapTable emptyOverlapTable() { + public OverlapTable emptyOverlapTable(GenomeLocParser genomeLocParser) { // only eval, no comp - OverlapTable ot = new OverlapTable(); + OverlapTable ot = new OverlapTable(genomeLocParser); ot.setEvalSizeAndType(evalContexts.get(0)); return ot; } @@ -145,17 +146,17 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) { if ( vcfWriter != null ) { int i = 0; - while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).isBefore(VariantContextUtils.getLocation(evalContexts.get(0)))) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).isBefore(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)))) { vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]); i++; } vcfWriter.add(evalContexts.get(0), ref.getBase()); - while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).distance(VariantContextUtils.getLocation(evalContexts.get(0))) <= indelWindow) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).distance(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0))) <= indelWindow) { vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]); i++; } } - OverlapTable ot = new OverlapTable(); + OverlapTable ot = new OverlapTable(ref.getGenomeLocParser()); ot.setCompOverlaps(compContexts.size()); ot.setDistances(compContexts,evalContexts.get(0), indelWindow); return ot; @@ -164,13 +165,15 @@ public class IndelDBRateWalker extends RodWalker } class OverlapTable { + private GenomeLocParser genomeLocParser; private int numOverlaps; private ExpandingArrayList distances; // currently unused private int evalSize; private boolean isDeletion; - public OverlapTable() { + public OverlapTable(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; numOverlaps = 0; } @@ -187,8 +190,8 @@ class OverlapTable { public void setDistances(List comps, VariantContext eval, int winsize) { distances = new ExpandingArrayList(); for ( VariantContext comp : comps ) { - if ( VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)) <= winsize ) { - distances.add(VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval))); + if ( VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)) <= winsize ) { + distances.add(VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval))); } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java index a3296c9ce..302c071d5 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java @@ -143,7 +143,7 @@ public class IndelErrorRateWalker extends LocusWalker { // System.out.println("Non countable indel event at "+pileup.getLocation()); countableIndelBuffer.clear(); coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well - skipToLoc = GenomeLocParser.createGenomeLoc(pileup.getLocation().getContigIndex(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1); + skipToLoc = ref.getGenomeLocParser().createGenomeLoc(pileup.getLocation().getContig(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1); // System.out.println("Skip to "+skipToLoc); } else { // pileup does not contain too many indels, we need to store them in the buffer and count them later, diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java index 1def72a21..109fa6b40 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java @@ -83,7 +83,7 @@ public class MarkIntervals extends RodWalker { try { for ( String line : new XReadLines(locs, true) ) { String parts[] = line.split(":"); - badSites.add(GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]))); + badSites.add(getToolkit().getGenomeLocParser().createGenomeLoc(parts[0], Integer.valueOf(parts[1]))); } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(locs, e); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index 89487ca16..66969da79 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -237,7 +237,7 @@ public class MendelianViolationClassifier extends LocusWalker { +public class + ReadQualityScoreWalker extends ReadWalker { @Output protected PrintStream out; @Argument(fullName = "inputQualityFile", shortName = "if", doc = "Input quality score file generated by NeighborhoodQualityWalker", required = true) @@ -98,7 +99,7 @@ public class ReadQualityScoreWalker extends ReadWalker // BUGBUG: This assumes reads will be sorted by start location float sumNeighborhoodQuality = 0.0f; int numLines = 0; - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc( read ); + GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc( read ); if( readLoc.size() > 0 ) { // only calculate mean NQS if the read has a well formed GenomeLoc, if not NQS will be zero try { if( line == null ) { @@ -106,12 +107,12 @@ public class ReadQualityScoreWalker extends ReadWalker if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file is empty" ); } } String[] halves = line.split( " ", 2 ); - GenomeLoc curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + GenomeLoc curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); while( curLoc.isBefore( readLoc ) ) { // Loop until the beginning of the read line = inputReader.readLine(); if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find beginning of read: " + readLoc ); } halves = line.split( " ", 2 ); - curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); } // now we have skipped ahead in the input file to where this read starts logger.debug( "Starting: " + curLoc + ", read: " + readLoc + "\t size: " + readLoc.size() ); @@ -124,7 +125,7 @@ public class ReadQualityScoreWalker extends ReadWalker line = inputReader.readLine(); if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find end of read: " + readLoc ); } halves = line.split( " ", 2 ); - curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); } // now we have parsed the input file up to where the read ends // reset back to the mark in order to parse the next read in the next call to the reduce function diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java index 03777e8ec..a81136d8c 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java @@ -60,7 +60,7 @@ public class RealignedReadCounter extends ReadWalker { public void initialize() { // prepare to read intervals one-by-one, as needed (assuming they are sorted). - intervals = new IntervalFileMergingIterator( new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); + intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); currentInterval = intervals.hasNext() ? intervals.next() : null; } @@ -69,10 +69,10 @@ public class RealignedReadCounter extends ReadWalker { return 0; } - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read); // hack to get around unmapped reads having screwy locations if ( readLoc.getStop() == 0 ) - readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart()); + readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart()); if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) return 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java index 2a10f1ad5..40adde697 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java @@ -130,7 +130,7 @@ public class TestReadFishingWalker extends ReadWalker { else throw new ReviewedStingException("Invalid indel type: " + type); - aligners.put(GenomeLocParser.createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration())); + aligners.put(getToolkit().getGenomeLocParser().createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration())); if(++numAlignersCreated % 100 == 0) out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java index 45768476d..5c06c188f 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java @@ -28,9 +28,9 @@ public class ValidateRODForReads extends ReadWalker { @Override public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { if (tracker != null) { - Map> mapping = tracker.getContigOffsetMapping(); - for (Map.Entry> entry : mapping.entrySet()) { - GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey()); + Map> mapping = tracker.getContigOffsetMapping(); + for (Map.Entry> entry : mapping.entrySet()) { + GenomeLoc location = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),entry.getKey()); if (!map.containsKey(location)) { map.put(location,0); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java index 09e48833e..6cbe725c7 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java @@ -113,7 +113,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker, SA // out.printf("Combining Read %s%n", read.format()); // } // - combinedRead = DupUtils.combineDuplicates(reads, MAX_QUALITY_SCORE); + combinedRead = DupUtils.combineDuplicates(getToolkit().getGenomeLocParser(),reads, MAX_QUALITY_SCORE); //out.printf(" => into %s%n", combinedRead.format()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 2c6bf42ce..e08d51e2a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -468,10 +468,10 @@ public class UnifiedGenotyperEngine { // if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome) if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) { - GenomeLoc window = GenomeLocParser.createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength())); + GenomeLoc window = refContext.getGenomeLocParser().createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength())); byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases(); StringUtil.toUpperCase(bases); - refContext = new ReferenceContext(refContext.getLocus(), window, bases); + refContext = new ReferenceContext(refContext.getGenomeLocParser(),refContext.getLocus(), window, bases); } BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE); diff --git a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java index c08b459dc..d16c50fac 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java +++ b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java @@ -136,10 +136,10 @@ public class RemapAlignments extends CommandLineProgram { } h.setSequenceDictionary(reference.getSequenceDictionary()); - GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary()); - + GenomeLocParser genomeLocParser = new GenomeLocParser(reference.getSequenceDictionary()); + map = new GenomicMap(10000); - map.read(MAP_FILE); + map.read(genomeLocParser,MAP_FILE); System.out.println("Map loaded successfully: "+map.size()+" contigs"); diff --git a/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java b/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java index c22105ed5..49c136874 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java +++ b/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java @@ -40,12 +40,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.*; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.*; @@ -95,7 +90,7 @@ public class GenomicMap implements Iterable 0 && segments.get(segments.size()-1).getStop()+1 == newSegment.getStart() && segments.get(segments.size()-1).getContigIndex() == newSegment.getContigIndex()) @@ -408,7 +404,7 @@ public class GenomicMap implements Iterable, Cloneable, Serializable * start and stop position, and (optionally) the contig name */ protected final int contigIndex; - protected final long start; - protected final long stop; + protected final int start; + protected final int stop; protected final String contigName; // -------------------------------------------------------------------------------------------------------------- @@ -32,93 +32,25 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable // constructors // // -------------------------------------------------------------------------------------------------------------- - /*GenomeLoc( int contigIndex, final long start, final long stop ) { - MAX_CONTIG = Integer.MAX_VALUE; - if (start < 0) { throw new StingException("Bad start position " + start);} - if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position - - this.contigIndex = contigIndex; - this.start = start; - this.contigName = null; // we just don't know - this.stop = stop == -1 ? start : stop; - }*/ protected GenomeLoc(final SAMRecord read) { this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()); } - protected GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) { + protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) { this.contigName = contig; this.contigIndex = contigIndex; this.start = start; this.stop = stop; } - /*GenomeLoc( final int contig, final long pos ) { - this(contig, pos, pos ); - } - */ - protected GenomeLoc( final GenomeLoc toCopy ) { - this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() ); - } - - /** - * Returns true if we have a specified series of locations to process AND we are past the last - * location in the list. It means that, in a serial processing of the genome, that we are done. - * - * @param curr Current genome Location - * @param locs a list of genomic locations - * @return true if we are past the last location to process + * Return a new GenomeLoc at this same position. + * @return A GenomeLoc with the same contents as the current loc. */ - public static boolean pastFinalLocation(GenomeLoc curr, List locs) { - return (locs.size() > 0 && curr.isPast(locs.get(locs.size() - 1))); - } - - /** - * A key function that returns true if the proposed GenomeLoc curr is within the list of - * locations we are processing in this TraversalEngine - * - * @param curr the current location - * @param locs a list of genomic locations - * @return true if we should process GenomeLoc curr, otherwise false - */ - public static boolean inLocations(GenomeLoc curr, ArrayList locs) { - if ( locs.size() == 0 ) { - return true; - } else { - for ( GenomeLoc loc : locs ) { - //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); - if (loc.overlapsP(curr)) - return true; - } - return false; - } - } - - public static void removePastLocs(GenomeLoc curr, List locs) { - while ( !locs.isEmpty() && curr.isPast(locs.get(0)) ) { - //System.out.println("At: " + curr + ", removing: " + locs.get(0)); - locs.remove(0); - } - } - - public static boolean overlapswithSortedLocsP(GenomeLoc curr, List locs, boolean returnTrueIfEmpty) { - if ( locs.isEmpty() ) - return returnTrueIfEmpty; - - // skip loci before intervals begin - if ( curr.contigIndex < locs.get(0).contigIndex ) - return false; - - for ( GenomeLoc loc : locs ) { - //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); - if ( loc.overlapsP(curr) ) - return true; - if ( curr.compareTo(loc) < 0 ) - return false; - } - return false; + @Override + public GenomeLoc clone() { + return new GenomeLoc(getContig(),getContigIndex(),getStart(),getStop()); } // @@ -129,8 +61,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable } public final int getContigIndex() { return this.contigIndex; } - public final long getStart() { return this.start; } - public final long getStop() { return this.stop; } + public final int getStart() { return this.start; } + public final int getStop() { return this.stop; } public final String toString() { if ( throughEndOfContigP() && atBeginningOfContigP() ) return getContig(); @@ -139,13 +71,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable else return String.format("%s:%d-%d", getContig(), getStart(), getStop()); } - - public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; } - public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; } - public final boolean atBeginningOfContigP() { return this.start == 1; } - - - public final boolean isSingleBP() { return stop == start; } + private boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; } + private boolean atBeginningOfContigP() { return this.start == 1; } public final boolean disjointP(GenomeLoc that) { return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop; @@ -187,15 +114,6 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop(); } - /** - * Returns true if this GenomeLoc contains the start position of GenomeLoc that, on the same contig - * @param start - * @return - */ - public final boolean containsStartPosition(long start) { - return getStart() <= start && start <= getStop(); - } - public final boolean onSameContig(GenomeLoc that) { return (this.contigIndex == that.contigIndex); } @@ -215,26 +133,26 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable return this.compareTo(left) > -1 && this.compareTo(right) < 1; } + /** + * Tests whether this contig is completely before contig 'that'. + * @param that Contig to test against. + * @return true if this contig ends before 'that' starts; false if this is completely after or overlaps 'that'. + */ public final boolean isBefore( GenomeLoc that ) { int comparison = this.compareContigs(that); return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() )); } + /** + * Tests whether this contig is completely after contig 'that'. + * @param that Contig to test against. + * @return true if this contig starts after 'that' ends; false if this is completely before or overlaps 'that'. + */ public final boolean isPast( GenomeLoc that ) { int comparison = this.compareContigs(that); return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() )); } - public final boolean startsBefore( GenomeLoc that ) { - int comparison = this.compareContigs(that); - return ( comparison == -1 || ( comparison == 0 && this.getStart() < that.getStart() )); - } - - public final boolean startsAfter( GenomeLoc that ) { - int comparison = this.compareContigs(that); - return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStart() )); - } - // Return the minimum distance between any pair of bases in this and that GenomeLocs: public final int minDistance( final GenomeLoc that ) { if (!this.onSameContig(that)) @@ -281,15 +199,6 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable } - /** - * Return a new GenomeLoc at this same position. - * @return A GenomeLoc with the same contents as the current loc. - */ - @Override - public GenomeLoc clone() { - return new GenomeLoc(this); - } - /** * conpare this genomeLoc's contig to another genome loc * @param that the genome loc to compare contigs with diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 6336e113f..8ea108fe1 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -57,24 +57,33 @@ import org.broadinstitute.sting.utils.text.XReadLines; public class GenomeLocParser { private static Logger logger = Logger.getLogger(GenomeLocParser.class); - //private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3 - - // -------------------------------------------------------------------------------------------------------------- // // Ugly global variable defining the optional ordering of contig elements // // -------------------------------------------------------------------------------------------------------------- //public static Map refContigOrdering = null; - protected static SAMSequenceDictionary contigInfo = null; + protected SAMSequenceDictionary contigInfo = null; /** - * do we have a contig ordering setup? - * - * @return true if the contig order is setup + * set our internal reference contig order + * @param refFile the reference file */ - public static boolean hasKnownContigOrdering() { - return contigInfo != null; + public GenomeLocParser(final ReferenceSequenceFile refFile) { + this(refFile.getSequenceDictionary()); + } + + public GenomeLocParser(SAMSequenceDictionary seqDict) { + if (seqDict == null) { // we couldn't load the reference dictionary + //logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); + throw new UserException.CommandLineException("Failed to load reference dictionary"); + } else if (contigInfo == null) { + contigInfo = seqDict; + logger.debug(String.format("Prepared reference sequence contig dictionary")); + for (SAMSequenceRecord contig : seqDict.getSequences()) { + logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); + } + } } /** @@ -84,7 +93,7 @@ public class GenomeLocParser { * * @return the sam sequence record */ - public static SAMSequenceRecord getContigInfo(final String contig) { + public SAMSequenceRecord getContigInfo(final String contig) { return contigInfo.getSequence(contig); } @@ -96,53 +105,13 @@ public class GenomeLocParser { * * @return the contig index, -1 if not found */ - public static int getContigIndex(final String contig, boolean exceptionOut) { + public int getContigIndex(final String contig, boolean exceptionOut) { if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut) throw new UserException.CommandLineException(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); return contigInfo.getSequenceIndex(contig); } - /** - * set our internal reference contig order - * - * @param refFile the reference file - * - * @return true if we were successful - */ - public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) { - return setupRefContigOrdering(refFile.getSequenceDictionary()); - } - - /** - * setup our internal reference contig order - * - * @param seqDict the sequence dictionary - * - * @return true if we were successful - */ - public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) { - if (seqDict == null) { // we couldn't load the reference dictionary - //logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); - throw new UserException.CommandLineException("Failed to load reference dictionary"); - } else if (contigInfo == null) { - contigInfo = seqDict; - logger.debug(String.format("Prepared reference sequence contig dictionary")); - for (SAMSequenceRecord contig : seqDict.getSequences()) { - logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); - } - } - return true; - } - - /** - * A package-protected method that can be used by the test system to reset the sequence dictionary - * being used. Use this method sparingly. - */ - static void clearRefContigOrdering() { - contigInfo = null; - } - /** * parse a genome interval, from a location string * @@ -155,7 +124,7 @@ public class GenomeLocParser { * */ - public static GenomeLoc parseGenomeInterval(final String str) { + public GenomeLoc parseGenomeInterval(final String str) { GenomeLoc ret = parseGenomeLoc(str); exceptionOnInvalidGenomeLocBounds(ret); return ret; @@ -173,13 +142,13 @@ public class GenomeLocParser { * @return a GenomeLoc representing the String * */ - public static GenomeLoc parseGenomeLoc(final String str) { + public GenomeLoc parseGenomeLoc(final String str) { // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' //System.out.printf("Parsing location '%s'%n", str); String contig = null; - long start = 1; - long stop = -1; + int start = 1; + int stop = -1; final int colonIndex = str.indexOf(":"); if(colonIndex == -1) { @@ -210,7 +179,7 @@ public class GenomeLocParser { if (!isContigValid(contig)) throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); - if (stop == Integer.MAX_VALUE && hasKnownContigOrdering()) + if (stop == Integer.MAX_VALUE) // lookup the actually stop position! stop = getContigInfo(contig).getSequenceLength(); @@ -228,7 +197,7 @@ public class GenomeLocParser { * Parses a number like 1,000,000 into a long. * @param pos */ - private static long parsePosition(final String pos) { + private int parsePosition(final String pos) { //String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps //System.out.println("Parsing position: '" + pos + "'"); if(pos.indexOf('-') != -1) { @@ -244,13 +213,13 @@ public class GenomeLocParser { continue; } else if(c < '0' || c > '9') { throw new NumberFormatException("Position: '" + pos + "' contains invalid chars." ); - } else { + } else { buffer.append(c); } } - return Long.parseLong(buffer.toString()); + return Integer.parseInt(buffer.toString()); } else { - return Long.parseLong(pos); + return Integer.parseInt(pos); } } @@ -263,7 +232,7 @@ public class GenomeLocParser { * * @return the list of merged locations */ - public static List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { + public List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { if (raw.size() <= 1) return raw; else { @@ -292,7 +261,7 @@ public class GenomeLocParser { * * @return True if the contig is valid. False otherwise. */ - private static boolean isContigValid(String contig) { + private boolean isContigValid(String contig) { int contigIndex = contigInfo.getSequenceIndex(contig); return contigIndex >= 0 && contigIndex < contigInfo.size(); } @@ -309,7 +278,7 @@ public class GenomeLocParser { * Validation: only checks that contig is valid * start/stop could be anything */ - public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) { + public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) { if (!isContigValid(contig)) throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); return new GenomeLoc(contig, getContigIndex(contig,true), start, stop); @@ -327,7 +296,7 @@ public class GenomeLocParser { * @param allowEmptyIntervalList if false empty interval lists will return null * @return List List of Genome Locs that have been parsed from file */ - public static List intervalFileToList(final String file_name, boolean allowEmptyIntervalList) { + public List intervalFileToList(final String file_name, boolean allowEmptyIntervalList) { // try to open file File inputFile = new File(file_name); @@ -344,7 +313,7 @@ public class GenomeLocParser { // case: BED file if (file_name.toUpperCase().endsWith(".BED")) { - BedParser parser = new BedParser(inputFile); + BedParser parser = new BedParser(this,inputFile); return parser.getLocations(); } @@ -393,22 +362,8 @@ public class GenomeLocParser { * * @return the string that represents that contig name */ - private static String getSequenceNameFromIndex(int contigIndex) { - return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName(); - } - - /** - * create a genome loc, given the contig name, start, and stop - * - * @param contig the contig name - * @param start the starting position - * @param stop the stop position - * - * @return a new genome loc - */ - public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), start, stop)); + private String getSequenceNameFromIndex(int contigIndex) { + return contigInfo.getSequence(contigIndex).getSequenceName(); } /** @@ -420,31 +375,21 @@ public class GenomeLocParser { * * @return a new genome loc - but don't exception out if it is invalid */ - public static GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final long start, final long stop) { - checkSetup(); - return new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,false), start, stop); + public GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final int start, final int stop) { + return new GenomeLoc(contig, getContigIndex(contig,false), start, stop); } /** - * create a genome loc, given the contig index, start, and stop + * create a genome loc, given the contig name, start, and stop * - * @param contigIndex the contig index - * @param start the start position - * @param stop the stop position + * @param contig the contig name + * @param start the starting position + * @param stop the stop position * * @return a new genome loc */ - public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) { - checkSetup(); - if (start < 0) { - throw new ReviewedStingException("Bad start position " + start); - } - if (stop < -1) { - throw new ReviewedStingException("Bad stop position " + stop); - } // a negative -1 indicates it's not a meaningful end position - - - return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop); + public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) { + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), start, stop)); } /** @@ -454,25 +399,10 @@ public class GenomeLocParser { * * @return */ - public static GenomeLoc createGenomeLoc(final SAMRecord read) { - checkSetup(); + public GenomeLoc createGenomeLoc(final SAMRecord read) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd())); } - - /** - * create a new genome loc, given the contig position, and a single position - * - * @param contig the contig name - * @param pos the postion - * - * @return a genome loc representing a single base at the specified postion on the contig - */ - public static GenomeLoc createGenomeLoc(final int contig, final long pos) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos)); - } - /** * create a new genome loc, given the contig name, and a single position * @@ -481,14 +411,8 @@ public class GenomeLocParser { * * @return a genome loc representing a single base at the specified postion on the contig */ - public static GenomeLoc createGenomeLoc(final String contig, final long pos) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), pos, pos)); - } - - public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop())); + public GenomeLoc createGenomeLoc(final String contig, final int pos) { + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), pos, pos)); } /** @@ -505,7 +429,7 @@ public class GenomeLocParser { * @return the genome loc if it's valid, otherwise we throw an exception * */ - private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) { + private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) { if (toReturn.getStart() < 0) { throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0"); } @@ -534,7 +458,7 @@ public class GenomeLocParser { * * @param locus Locus to verify. */ - private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) { + private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) { int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength(); if(locus.getStart() > contigSize) throw new ReviewedStingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig())); @@ -554,8 +478,7 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(GenomeLoc loc) { - checkSetup(); + public boolean validGenomeLoc(GenomeLoc loc) { // quick check before we get the contig size, is the contig number valid if ((loc.getContigIndex() < 0) || // the contig index has to be positive (loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs) @@ -583,9 +506,8 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(String contig, long start, long stop) { - checkSetup(); - return validGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig, false), start, stop)); + public boolean validGenomeLoc(String contig, int start, int stop) { + return validGenomeLoc(new GenomeLoc(contig, getContigIndex(contig, false), start, stop)); } @@ -600,58 +522,11 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(int contigIndex, long start, long stop) { - checkSetup(); + public boolean validGenomeLoc(int contigIndex, int start, int stop) { if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false; return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop)); } - /** - * Move this Genome loc to the next contig, with a start - * and stop of 1. - * - * @return true if we are not out of contigs, otherwise false if we're - * at the end of the genome (no more contigs to jump to). - */ - public static GenomeLoc toNextContig(GenomeLoc current) { - if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) { - return null; - } else - return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1)); - } - - /** - * create a new genome loc, given an old location and a new contig - * - * @param loc the old location - * @param contig the new contig to set - * - * @return a new genome loc with an updated contig name and index - */ - public static GenomeLoc setContig(GenomeLoc loc, String contig) { - checkSetup(); - - int index = -1; - if ((index = contigInfo.getSequenceIndex(contig)) < 0) { - throw new ReviewedStingException("Contig name ( " + contig + " ) not in the set sequence dictionary."); - } - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop())); - } - - /** - * Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index - * - * @param contig - */ - public static GenomeLoc setContigIndex(GenomeLoc loc, int contig) { - checkSetup(); - if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) { - throw new ReviewedStingException("Contig index ( " + contig + " ) is not in the sequence dictionary set."); - } - return exceptionOnInvalidGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop())); - } - - /** * create a new genome loc from an existing loc, with a new start position * Note that this function will NOT explicitly check the ending offset, in case someone wants to @@ -662,8 +537,7 @@ public class GenomeLocParser { * * @return the newly created genome loc */ - public static GenomeLoc setStart(GenomeLoc loc, long start) { - checkSetup(); + public GenomeLoc setStart(GenomeLoc loc, int start) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop())); } @@ -677,8 +551,7 @@ public class GenomeLocParser { * * @return */ - public static GenomeLoc setStop(GenomeLoc loc, long stop) { - checkSetup(); + public GenomeLoc setStop(GenomeLoc loc, int stop) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop)); } @@ -689,7 +562,7 @@ public class GenomeLocParser { * * @return a new genome loc */ - public static GenomeLoc incPos(GenomeLoc loc) { + public GenomeLoc incPos(GenomeLoc loc) { return incPos(loc, 1); } @@ -701,41 +574,19 @@ public class GenomeLocParser { * * @return a new genome loc */ - public static GenomeLoc incPos(GenomeLoc loc, long by) { + public GenomeLoc incPos(GenomeLoc loc, int by) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by)); } /** - * create a new genome loc with an incremented position - * - * @param loc the location - * - * @return a new genome loc + * Creates a GenomeLoc than spans the entire contig. + * @param contigName Name of the contig. + * @return A locus spanning the entire contig. */ - public static GenomeLoc nextLoc(GenomeLoc loc) { - return incPos(loc); - } - - /** check to make sure that we've setup the contig information */ - private static void checkSetup() { - if (contigInfo == null) { - throw new ReviewedStingException("The GenomeLocParser hasn't been setup with a contig sequence yet"); - } - } - - /** - * compare two contig names, in the current context - * - * @param firstContig - * @param secondContig - * - * @return - */ - public static int compareContigs(String firstContig, String secondContig) { - checkSetup(); - Integer ref1 = GenomeLocParser.getContigIndex(firstContig,true); - Integer ref2 = GenomeLocParser.getContigIndex(secondContig,true); - return ref1.compareTo(ref2); - - } + public GenomeLoc createOverEntireContig(String contigName) { + SAMSequenceRecord contig = contigInfo.getSequence(contigName); + if(contig == null) + throw new ReviewedStingException("Unable to find contig named " + contigName); + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength())); + } } diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index fcb73f1ce..eba412e0b 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -38,25 +38,36 @@ import java.util.*; public class GenomeLocSortedSet extends AbstractSet { private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class); + private GenomeLocParser genomeLocParser; + // our private storage for the GenomeLoc's private List mArray = new ArrayList(); /** default constructor */ - public GenomeLocSortedSet() { + public GenomeLocSortedSet(GenomeLocParser parser) { + this.genomeLocParser = parser; } - public GenomeLocSortedSet(GenomeLoc e) { - this(); + public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) { + this(parser); add(e); } - public GenomeLocSortedSet(Collection l) { - this(); + public GenomeLocSortedSet(GenomeLocParser parser,Collection l) { + this(parser); for ( GenomeLoc e : l ) add(e); } + /** + * Gets the GenomeLocParser used to create this sorted set. + * @return The parser. Will never be null. + */ + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * get an iterator over this collection * @@ -201,7 +212,7 @@ public class GenomeLocSortedSet extends AbstractSet { logger.debug("removeRegions operation: i = " + i); } - return GenomeLocSortedSet.createSetFromList(good); + return createSetFromList(genomeLocParser,good); } private static final List EMPTY_LIST = new ArrayList(); @@ -221,8 +232,8 @@ public class GenomeLocSortedSet extends AbstractSet { * |------| + |--------| * */ - GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); - GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1); + GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop()); if (after.getStop() - after.getStart() >= 0) { l.add(after); } @@ -255,9 +266,9 @@ public class GenomeLocSortedSet extends AbstractSet { GenomeLoc n; if (e.getStart() < g.getStart()) { - n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + n = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop()); } else { - n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); + n = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1); } // replace g with the new region @@ -283,9 +294,10 @@ public class GenomeLocSortedSet extends AbstractSet { * @return the GenomeLocSet of all references sequences as GenomeLoc's */ public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { - GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(); + GenomeLocParser parser = new GenomeLocParser(dict); + GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser); for (SAMSequenceRecord record : dict.getSequences()) { - returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength())); + returnSortedSet.add(parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength())); } return returnSortedSet; } @@ -297,8 +309,8 @@ public class GenomeLocSortedSet extends AbstractSet { * * @return the sorted genome loc list */ - public static GenomeLocSortedSet createSetFromList(List locs) { - GenomeLocSortedSet set = new GenomeLocSortedSet(); + public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser,List locs) { + GenomeLocSortedSet set = new GenomeLocSortedSet(parser); set.addAll(locs); return set; } @@ -307,13 +319,13 @@ public class GenomeLocSortedSet extends AbstractSet { /** * return a deep copy of this collection. * - * @return a new GenomeLocSortedSet, indentical to the current GenomeLocSortedSet. + * @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet. */ public GenomeLocSortedSet clone() { - GenomeLocSortedSet ret = new GenomeLocSortedSet(); + GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser); for (GenomeLoc loc : this.mArray) { // ensure a deep copy - ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop())); + ret.mArray.add(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop())); } return ret; } diff --git a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index 31ffdaf81..b7ca11f9c 100644 --- a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -20,6 +20,8 @@ public class BedParser { // the buffered reader input private final BufferedReader mIn; + private GenomeLocParser genomeLocParser; + // our array of locations private List mLocations; @@ -28,7 +30,8 @@ public class BedParser { * * @param fl */ - public BedParser(File fl) { + public BedParser(GenomeLocParser genomeLocParser,File fl) { + this.genomeLocParser = genomeLocParser; try { mIn = new BufferedReader(new FileReader(fl)); } catch (FileNotFoundException e) { @@ -57,7 +60,7 @@ public class BedParser { List locArray = new ArrayList(); try { while ((line = mIn.readLine()) != null) { - locArray.add(parseLocation(line)); + locArray.add(parseLocation(genomeLocParser,line)); } } catch (IOException e) { throw new UserException.MalformedFile("Unable to parse line in BED file."); @@ -71,7 +74,7 @@ public class BedParser { * @param line the line, as a string * @return a parsed genome loc */ - public static GenomeLoc parseLocation(String line) { + public static GenomeLoc parseLocation(GenomeLocParser genomeLocParser,String line) { String contig; int start; int stop; @@ -85,7 +88,7 @@ public class BedParser { } // we currently drop the rest of the bed record, which can contain names, scores, etc - return GenomeLocParser.createGenomeLoc(contig, start, stop); + return genomeLocParser.createGenomeLoc(contig, start, stop); } diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index d2baa46c1..41de5ef96 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -45,7 +45,7 @@ public class DupUtils { } } - public static SAMRecord combineDuplicates(List duplicates, int maxQScore) { + public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List duplicates, int maxQScore) { if ( duplicates.size() == 0 ) return null; @@ -63,7 +63,7 @@ public class DupUtils { //for ( SAMRecord read : duplicates ) { // System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]); //} - Pair baseAndQual = combineBaseProbs(duplicates, i, maxQScore); + Pair baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore); bases[i] = baseAndQual.getFirst(); quals[i] = baseAndQual.getSecond(); } @@ -114,8 +114,8 @@ public class DupUtils { System.out.printf("%n"); } - private static Pair combineBaseProbs(List duplicates, int readOffset, int maxQScore) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0)); + private static Pair combineBaseProbs(GenomeLocParser genomeLocParser,List duplicates, int readOffset, int maxQScore) { + GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset); final boolean debug = false; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index c01596ae7..85c505935 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -114,11 +114,10 @@ public class Haplotype { // Create location for all haplotypes - long startLoc = ref.getWindow().getStart() + startIdxInReference; - long stopLoc = startLoc + haplotypeSize-1; + int startLoc = ref.getWindow().getStart() + startIdxInReference; + int stopLoc = startLoc + haplotypeSize-1; - GenomeLoc locus = GenomeLocParser.createGenomeLoc(ref.getLocus().getContigIndex(),startLoc, - stopLoc); + GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); for (Allele a : vc.getAlleles()) { diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index ee5064289..e722ac196 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; @@ -56,17 +57,17 @@ public class IntervalFileMergingIterator implements Iterator { private IntervalMergingRule myRule; private File myFile; - public IntervalFileMergingIterator(File f, IntervalMergingRule rule) { + public IntervalFileMergingIterator(GenomeLocParser genomeLocParser,File f, IntervalMergingRule rule) { myFile = f; try { XReadLines reader = new XReadLines(f); if (f.getName().toUpperCase().endsWith(".BED")) { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( reader.iterator(), + it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ; } else { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( reader.iterator(), + it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ; } } catch ( FileNotFoundException e ) { diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 9f68ac87a..a2967d455 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -29,7 +29,7 @@ public class IntervalUtils { * @param allowEmptyIntervalList If false instead of an empty interval list will return null. * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. */ - public static List parseIntervalArguments(List argList, boolean allowEmptyIntervalList) { + public static List parseIntervalArguments(GenomeLocParser parser, List argList, boolean allowEmptyIntervalList) { List rawIntervals = new ArrayList(); // running list of raw GenomeLocs if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to @@ -51,7 +51,7 @@ public class IntervalUtils { // if it's a file, add items to raw interval list if (isIntervalFile(fileOrInterval)) { try { - rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList)); + rawIntervals.addAll(parser.intervalFileToList(fileOrInterval, allowEmptyIntervalList)); } catch (Exception e) { throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e); @@ -60,7 +60,7 @@ public class IntervalUtils { // otherwise treat as an interval -> parse and add to raw interval list else { - rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval)); + rawIntervals.add(parser.parseGenomeInterval(fileOrInterval)); } } } @@ -121,13 +121,13 @@ public class IntervalUtils { * @param mergingRule A descriptor for the type of merging to perform. * @return A sorted, merged version of the intervals passed in. */ - public static GenomeLocSortedSet sortAndMergeIntervals(List intervals, IntervalMergingRule mergingRule) { + public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List intervals, IntervalMergingRule mergingRule) { // sort raw interval list Collections.sort(intervals); // now merge raw interval list - intervals = GenomeLocParser.mergeIntervalLocations(intervals, mergingRule); + intervals = parser.mergeIntervalLocations(intervals, mergingRule); - return GenomeLocSortedSet.createSetFromList(intervals); + return GenomeLocSortedSet.createSetFromList(parser,intervals); } /** diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 15583da7a..bfa93e005 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -90,10 +90,6 @@ public class ExtendedEventPileupElement extends PileupElement { public Type getType() { return type; } - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength); - } - // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with // a valid base quality. The following code attempts to compensate for that.' diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index 16293ba62..ce6ca570c 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -1,9 +1,6 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordIterator; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import java.io.InputStream; import java.io.ByteArrayInputStream; @@ -31,17 +28,23 @@ import org.broadinstitute.sting.utils.GenomeLocParser; */ public class ArtificialSAMFileReader extends SAMFileReader { + /** + * The parser, for GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * Backing data store of reads. */ - private List reads = null; + private final List reads; /** * Construct an artificial SAM file reader. * @param reads Reads to use as backing data source. */ - public ArtificialSAMFileReader(SAMRecord... reads) { + public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) { super( createEmptyInputStream(),true ); + this.genomeLocParser = new GenomeLocParser(sequenceDictionary); this.reads = Arrays.asList(reads); } @@ -50,11 +53,11 @@ public class ArtificialSAMFileReader extends SAMFileReader { */ @Override public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { - GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end); + GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end); List coveredSubset = new ArrayList(); for( SAMRecord read: reads ) { - GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read); if( contained && region.containsP(readPosition) ) coveredSubset.add(read); else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java b/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java index 1d75e00b1..01f1dfe96 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java @@ -32,11 +32,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser; public class ComparableSAMRecord implements Comparable { private SAMRecord record; - private GenomeLoc loc; public ComparableSAMRecord(SAMRecord record) { this.record = record; - this.loc = GenomeLocParser.createGenomeLoc(record); } public SAMRecord getRecord() { @@ -44,8 +42,8 @@ public class ComparableSAMRecord implements Comparable { } public int compareTo(ComparableSAMRecord o) { - // first sort by start position - int comparison = loc.compareTo(o.loc); + // first sort by start position -- with not coverflow because both are guaranteed to be positive. + int comparison = record.getAlignmentStart() - o.record.getAlignmentStart(); // if the reads have the same start position, we must give a non-zero comparison // (because java Sets often require "consistency with equals") if ( comparison == 0 ) @@ -63,7 +61,7 @@ public class ComparableSAMRecord implements Comparable { return true; ComparableSAMRecord csr = (ComparableSAMRecord)obj; - if ( loc.compareTo(csr.loc) != 0 ) + if(record.getAlignmentStart() != csr.record.getAlignmentStart()) return false; if ( !record.getReadName().equals(csr.getRecord().getReadName()) ) return false; diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 80562b77a..e8712fcb0 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -30,7 +30,6 @@ import org.broad.tribble.index.IndexFactory; import org.broad.tribble.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; @@ -311,8 +310,6 @@ public class WalkerTest extends BaseTest { * @return a pair of file and string lists */ private Pair, List> executeTest(String name, List md5s, List tmpFiles, String args, Class expectedException) { - GenomeLocParserTestUtils.clearSequenceDictionary(); - CommandLineGATK instance = new CommandLineGATK(); String[] command; diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java index 146c11231..ffd31c9cd 100644 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java @@ -69,16 +69,18 @@ public class VariantJEXLContextUnitTest extends BaseTest { private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; static SAMFileHeader header; + private static GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); try { exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.createExpression(expression)); } catch (Exception e) { Assert.fail("Unable to create expression" + e.getMessage()); } - snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10); + snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10); } @BeforeMethod @@ -144,7 +146,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { List alleles = Arrays.asList(Aref, T); VariantContext vc = new VariantContext("test", snpLoc.getContig(), snpLoc.getStart(), snpLoc.getStop(), alleles); - return new JEXLMap(Arrays.asList(exp),vc); + return new JEXLMap(genomeLocParser,Arrays.asList(exp),vc); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java index 4665f11af..9807cede4 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java @@ -44,16 +44,16 @@ public class AllLocusViewUnitTest extends LocusViewTemplate { // TODO: Should skip over loci not in the given range. GenomeLoc firstLoc = range.get(0); GenomeLoc lastLoc = range.get(range.size()-1); - GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop()); + GenomeLoc bounds = genomeLocParser.createGenomeLoc(firstLoc.getContig(),firstLoc.getStart(),lastLoc.getStop()); - for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); + for( int i = bounds.getStart(); i <= bounds.getStop(); i++ ) { + GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i); AlignmentContext locusContext = allLocusView.next(); Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect"); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { + if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); expectedReadsAtSite++; } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java index 625fc22d9..75716eae6 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java @@ -47,14 +47,14 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { // TODO: Should skip over loci not in the given range. GenomeLoc firstLoc = range.get(0); GenomeLoc lastLoc = range.get(range.size()-1); - GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop()); + GenomeLoc bounds = genomeLocParser.createGenomeLoc(firstLoc.getContig(),firstLoc.getStart(),lastLoc.getStop()); - for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); + for( int i = bounds.getStart(); i <= bounds.getStop(); i++ ) { + GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if( GenomeLocParser.createGenomeLoc(read).containsP(site) ) + if( genomeLocParser.createGenomeLoc(read).containsP(site) ) expectedReadsAtSite++; } @@ -68,7 +68,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { Assert.assertEquals(locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site"); for( SAMRecord read: reads ) { - if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) + if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java index 26efb0e13..0375e0efd 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java @@ -57,11 +57,15 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { @Test public void testOverlappingReferenceBases() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()))); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), + sequenceFile.getSequence("chrM").length() - 10, + sequenceFile.getSequence("chrM").length()))); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); - byte[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9)); + byte[] results = view.getReferenceBases(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), + sequenceFile.getSequence("chrM").length() - 10, + sequenceFile.getSequence("chrM").length() + 9)); System.out.printf("results are %s%n", new String(results)); Assert.assertEquals(results.length, 20); for (int x = 0; x < results.length; x++) { @@ -74,16 +78,16 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { /** Queries outside the bounds of the shard should result in reference context window trimmed at the shard boundary. */ @Test public void testBoundsFailure() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 1, 50))); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); - GenomeLoc locus = GenomeLocParser.createGenomeLoc(0, 50, 51); + GenomeLoc locus = genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 50, 51); ReferenceContext rc = view.getReferenceContext(locus); Assert.assertTrue(rc.getLocus().equals(locus)); - Assert.assertTrue(rc.getWindow().equals(GenomeLocParser.createGenomeLoc(0,50))); + Assert.assertTrue(rc.getWindow().equals(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),50))); Assert.assertTrue(rc.getBases().length == 1); } @@ -94,10 +98,10 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { * @param loc */ protected void validateLocation( GenomeLoc loc ) { - Shard shard = new MockLocusShard(Collections.singletonList(loc)); - GenomeLocusIterator shardIterator = new GenomeLocusIterator(loc); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(loc)); + GenomeLocusIterator shardIterator = new GenomeLocusIterator(genomeLocParser,loc); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, loc, null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, loc, null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); while (shardIterator.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 078978586..e4d478d61 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -16,7 +16,6 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -39,23 +38,23 @@ import java.util.*; public abstract class LocusViewTemplate extends BaseTest { protected static ReferenceSequenceFile sequenceSourceFile = null; + protected GenomeLocParser genomeLocParser = null; @BeforeClass public void setupGenomeLoc() throws FileNotFoundException { - GenomeLocParserTestUtils.clearSequenceDictionary(); sequenceSourceFile = fakeReferenceSequenceFile(); - GenomeLocParser.setupRefContigOrdering(sequenceSourceFile); + genomeLocParser = new GenomeLocParser(sequenceSourceFile); } @Test public void emptyAlignmentContextTest() { SAMRecordIterator iterator = new SAMRecordIterator(); - GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList()),Collections.singletonList(shardBounds),Collections.emptyMap()); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); + Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -67,11 +66,11 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); - GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new MockLocusShard(Collections.singletonList(shardBounds)); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds)); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -83,10 +82,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -97,10 +96,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -111,10 +110,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 3, 7); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -125,10 +124,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -139,10 +138,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 6, 15); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -154,10 +153,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read2 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -173,10 +172,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -192,10 +191,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1", 5, 9); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -213,10 +212,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read6 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -241,10 +240,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06, read07, read08, read09, read10, read11, read12); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java index fe58e166a..9c84c3e18 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java @@ -52,6 +52,7 @@ import java.util.*; * test out the ReadBasedReferenceOrderedView class */ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; private static int startingChr = 1; private static int endingChr = 2; @@ -62,7 +63,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @BeforeMethod @@ -76,15 +77,15 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { for (int x = 1; x < 11; x++) { SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10); } - GenomeLoc start = GenomeLocParser.createGenomeLoc(0, 0, 0); + GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0); List list = new ArrayList(); - list.add(new RMDDataState(null, new FakePeekingRODIterator(start, "fakeName"))); + list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName"))); ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list)); for (SAMRecord rec : records) { ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec); - Map> map = tracker.getReadOffsetMapping(); - for (Long i : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(); + for (Integer i : map.keySet()) { Assert.assertEquals(map.get(i).size(), 1); } Assert.assertEquals(map.keySet().size(), 10); @@ -96,15 +97,16 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { + private GenomeLocParser genomeLocParser; // current location private GenomeLoc location; private GATKFeature curROD; private final String name; - public FakePeekingRODIterator(GenomeLoc startingLoc, String name) { + public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) { this.name = name; - this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); + this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); } @Override @@ -134,7 +136,7 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { public RODRecordList next() { System.err.println("Next -> " + location); curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name); - location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1); + location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1); FakeRODRecordList list = new FakeRODRecordList(); list.add(curROD); return list; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java index bc9b685cf..b00f7ee6a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java @@ -70,7 +70,7 @@ public class ReadReferenceViewUnitTest extends ReferenceViewTemplate { final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1); final long contigStop = selectedContig.getSequenceLength() + overlap; - ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,genomeLocParser,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop); @@ -98,7 +98,7 @@ public class ReadReferenceViewUnitTest extends ReferenceViewTemplate { protected void validateLocation( GenomeLoc loc ) { SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() ); - ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,genomeLocParser,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 87aeffbc5..8227435ae 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -43,17 +43,20 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { * Sequence file. */ private static IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; /** * our track builder */ - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = null; @BeforeClass public void init() throws FileNotFoundException { // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); + builder = new RMDTrackBuilder(); + builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser); } /** @@ -61,11 +64,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { */ @Test public void testNoBindings() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10)); Assert.assertEquals(tracker.getAllRods().size(), 0, "The tracker should not have produced any data"); } @@ -76,14 +79,14 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { public void testSingleBinding() { File file = new File(testDir + "TabularDataTest.dat"); RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(track,false); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum = tracker.lookup("tableTest",TableFeature.class); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); @@ -100,17 +103,17 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(track,false); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(track2,false); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track2,false); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum1 = tracker.lookup("tableTest1",TableFeature.class); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java index 0bb142ef7..c8bcf559b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java @@ -5,7 +5,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -32,7 +31,8 @@ public abstract class ReferenceViewTemplate extends BaseTest { /** * The fasta, for comparison. */ - protected static IndexedFastaSequenceFile sequenceFile = null; + protected IndexedFastaSequenceFile sequenceFile = null; + protected GenomeLocParser genomeLocParser = null; // // The bulk of sequence retrieval is tested by IndexedFastaSequenceFile, but we'll run a few spot @@ -43,9 +43,8 @@ public abstract class ReferenceViewTemplate extends BaseTest { */ @BeforeClass public void initialize() throws FileNotFoundException { - GenomeLocParserTestUtils.clearSequenceDictionary(); sequenceFile = new IndexedFastaSequenceFile( new File(hg18Reference) ); - GenomeLocParser.setupRefContigOrdering(sequenceFile); + genomeLocParser = new GenomeLocParser(sequenceFile); } /** @@ -53,7 +52,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { */ @Test public void testReferenceStart() { - validateLocation( GenomeLocParser.createGenomeLoc(0,1,25) ); + validateLocation( genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),1,25) ); } /** @@ -63,9 +62,9 @@ public abstract class ReferenceViewTemplate extends BaseTest { public void testReferenceEnd() { // Test the last 25 bases of the first contig. SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); - final long contigStart = selectedContig.getSequenceLength() - 24; - final long contigStop = selectedContig.getSequenceLength(); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + final int contigStart = selectedContig.getSequenceLength() - 24; + final int contigStop = selectedContig.getSequenceLength(); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); } /** @@ -76,7 +75,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),1,25) ); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),1,25) ); } @@ -88,9 +87,9 @@ public abstract class ReferenceViewTemplate extends BaseTest { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); - final long contigStart = selectedContig.getSequenceLength() - 24; - final long contigStop = selectedContig.getSequenceLength(); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + final int contigStart = selectedContig.getSequenceLength() - 24; + final int contigStop = selectedContig.getSequenceLength(); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); } protected abstract void validateLocation( GenomeLoc loc ); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java index 88e2ddfd0..638b04a92 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java @@ -36,7 +36,7 @@ public class ShardDataProviderUnitTest extends BaseTest { @BeforeMethod public void createProvider() { - provider = new LocusShardDataProvider( null,null,null,null,null,null ); + provider = new LocusShardDataProvider( null,null,null,null,null,null,null ); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index f5e90a8be..8b65f0900 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.List; import java.util.Collections; @@ -38,7 +39,7 @@ import java.util.Collections; * @version 0.1 */ public class MockLocusShard extends LocusShard { - public MockLocusShard(final List intervals) { - super(new SAMDataSource(Collections.emptyList()),intervals,null); + public MockLocusShard(final GenomeLocParser genomeLocParser,final List intervals) { + super(new SAMDataSource(Collections.emptyList(),genomeLocParser),intervals,null); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index c0149580d..837e3735e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -40,30 +40,34 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { private RMDTrack rod = null; + private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; + private GenomeLoc testSite1; private GenomeLoc testSite2; private GenomeLoc testSite3; @BeforeClass public void init() throws FileNotFoundException { - File sequenceFile = new File(hg18Reference); - GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile)); + seq = new IndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); - testSite1 = GenomeLocParser.createGenomeLoc("chrM",10); - testSite2 = GenomeLocParser.createGenomeLoc("chrM",20); - testSite3 = GenomeLocParser.createGenomeLoc("chrM",30); + testSite1 = genomeLocParser.createGenomeLoc("chrM",10); + testSite2 = genomeLocParser.createGenomeLoc("chrM",20); + testSite3 = genomeLocParser.createGenomeLoc("chrM",30); } @BeforeMethod public void setUp() { File file = new File(testDir + "TabularDataTest.dat"); RMDTrackBuilder builder = new RMDTrackBuilder(); + builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser); rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file); } @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -84,7 +88,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. @@ -134,7 +138,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -169,7 +173,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser, rod, false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index 96f21e698..373cb6634 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -52,6 +52,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { private List readers; private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; /** * This function does the setup of our parser, before each method call. @@ -64,7 +65,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary()); } /** @@ -88,8 +89,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); // the sharding strat. - SAMDataSource data = new SAMDataSource(readers); - ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new SAMDataSource(readers,genomeLocParser); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser); int count = 0; try { @@ -132,8 +133,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); // the sharding strat. - SAMDataSource data = new SAMDataSource(readers); - ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new SAMDataSource(readers,genomeLocParser); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser); ArrayList readcountPerShard = new ArrayList(); ArrayList readcountPerShard2 = new ArrayList(); @@ -174,8 +175,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { count = 0; // the sharding strat. - data = new SAMDataSource(readers); - strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + data = new SAMDataSource(readers,genomeLocParser); + strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000, genomeLocParser); logger.debug("Pile two:"); try { diff --git a/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java b/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java index ffad08047..166eb8a1d 100644 --- a/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java @@ -43,8 +43,6 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { groupRecord.setAttribute("PL", PLATFORM_PREFIX + (((i-1)%2)+1)); groupRecord.setAttribute("PU", PLATFORM_UNIT_PREFIX + (((i-1)%3)+1)); } - - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); } @Test(expectedExceptions=ReviewedStingException.class) diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java index e81c80b96..dfef86941 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java @@ -72,10 +72,6 @@ public class BoundedReadIteratorUnitTest extends BaseTest { @BeforeMethod public void doForEachTest() throws FileNotFoundException { fl = new ArrayList(); - - // sequence - seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index f01c4869c..7c8cd922b 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -29,10 +29,12 @@ public class LocusIteratorByStateUnitTest extends BaseTest { private static SAMFileHeader header; private LocusIteratorByState li; + private GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @Test @@ -61,7 +63,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { List reads = Arrays.asList(before,during,after); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes,genomeLocParser); boolean foundExtendedEventPileup = false; while (li.hasNext()) { @@ -113,7 +115,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { List reads = Arrays.asList(before,during,after); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes,genomeLocParser); boolean foundExtendedEventPileup = false; while (li.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java index 1450a975f..2198c461d 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java @@ -56,10 +56,12 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { private static SAMFileHeader header; private Set nameSet; + private GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @BeforeMethod @@ -75,7 +77,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2); } @@ -89,7 +91,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1); } @@ -103,8 +105,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping("default"); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping("default"); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 1); } @@ -117,8 +119,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 2); } @@ -136,8 +138,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), y + 2); } @@ -155,8 +157,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(Fake2RODatum.class); - for (long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(Fake2RODatum.class); + for (int x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 1); } @@ -169,7 +171,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1); } @@ -182,7 +184,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getContigOffsetMapping().keySet()) { + for (Integer x : tracker.getContigOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1); } @@ -200,9 +202,9 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { */ private ReadMetaDataTracker getRMDT(int incr, Set names, boolean alternateTypes) { SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10); - TreeMap data = new TreeMap(); + TreeMap data = new TreeMap(); for (int x = 0; x < record.getAlignmentEnd(); x += incr) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); + GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); RODMetaDataContainer set = new RODMetaDataContainer(); int cnt = 0; @@ -213,9 +215,9 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { set.addEntry(new FakeRODatum(loc, name)); cnt++; } - data.put((long) record.getAlignmentStart() + x, set); + data.put(record.getAlignmentStart() + x, set); } - ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data); + ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data); return tracker; } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java index f95a3b193..0e5744697 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java @@ -53,12 +53,13 @@ import java.util.Map; public class RMDTrackBuilderUnitTest extends BaseTest { private RMDTrackBuilder builder; private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; @BeforeMethod public void setup() { - builder = new RMDTrackBuilder(); seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); + builder = new RMDTrackBuilder(); } @Test diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java index 8f6d16782..d9b30cdf3 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java @@ -33,15 +33,19 @@ public class FlashBackIteratorUnitTest extends BaseTest { private static final int STARTING_CHROMOSOME = 1; private static final int CHROMOSOME_SIZE = 1000; + private String firstContig; + private GenomeLocParser genomeLocParser; + @BeforeMethod public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + firstContig = header.getSequenceDictionary().getSequence(0).getSequenceName(); } @Test public void testBasicIteration() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -55,8 +59,8 @@ public class FlashBackIteratorUnitTest extends BaseTest { @Test public void testBasicIterationThenFlashBack() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -66,13 +70,13 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 2)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 2)); } @Test public void testBasicIterationThenFlashBackThenIterate() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -82,7 +86,7 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 1)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 1)); int count = 0; while (iter.hasNext()) { count++; @@ -94,8 +98,8 @@ public class FlashBackIteratorUnitTest extends BaseTest { @Test public void testFlashBackTruth() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(loc); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(genomeLocParser,loc); // remove the first three records backIter.next(); backIter.next(); @@ -110,16 +114,16 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 5))); - Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 15))); - Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 2))); - Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 1))); + Assert.assertTrue(iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 5))); + Assert.assertTrue(iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 15))); + Assert.assertTrue(!iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 2))); + Assert.assertTrue(!iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 1))); } @Test public void testBasicIterationThenFlashBackHalfWayThenIterate() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -129,7 +133,7 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 5)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 5)); int count = 0; while (iter.hasNext()) { count++; @@ -141,15 +145,16 @@ public class FlashBackIteratorUnitTest extends BaseTest { class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { + private GenomeLocParser genomeLocParser; // current location private GenomeLoc location; private FakeRODatum curROD; private int recordCount = 10; - public FakeSeekableRODIterator(GenomeLoc startingLoc) { - this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); - ; + public FakeSeekableRODIterator(GenomeLocParser genomeLocParser,GenomeLoc startingLoc) { + this.genomeLocParser = genomeLocParser; + this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); } @Override @@ -178,7 +183,7 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { public RODRecordList next() { RODRecordList list = new FakeRODRecordList(); curROD = new FakeRODatum("STUPIDNAME", location); - location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1); + location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1); list.add(curROD); recordCount--; return list; diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java index b60e81026..e8e3631d5 100644 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -52,16 +53,22 @@ public class TraverseDuplicatesUnitTest extends BaseTest { private TraverseDuplicates obj = new TraverseDuplicates(); private SAMFileHeader header; - + private GenomeLocParser genomeLocParser; + private GenomeAnalysisEngine engine; @BeforeMethod public void doBefore() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser =new GenomeLocParser(header.getSequenceDictionary()); + + engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + obj.initialize(engine); } @Test - public void testAllDupplicatesNoPairs() { + public void testAllDuplicatesNoPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); @@ -74,7 +81,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testNoDupplicatesNoPairs() { + public void testNoDuplicatesNoPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); @@ -102,7 +109,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testAllDupplicatesAllPairs() { + public void testAllDuplicatesAllPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); @@ -118,7 +125,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testNoDupplicatesAllPairs() { + public void testNoDuplicatesAllPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); @@ -134,7 +141,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testAllDupplicatesAllPairsDifferentPairedEnd() { + public void testAllDuplicatesAllPairsDifferentPairedEnd() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index a44b9d44d..9d642519b 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; @@ -17,7 +18,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import static org.testng.Assert.fail; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -68,12 +68,16 @@ public class TraverseReadsUnitTest extends BaseTest { private TraverseReads traversalEngine = null; private IndexedFastaSequenceFile ref = null; + private GenomeLocParser genomeLocParser = null; + private GenomeAnalysisEngine engine = null; @BeforeClass public void doOnce() { - GenomeLocParserTestUtils.clearSequenceDictionary(); ref = new IndexedFastaSequenceFile(refFile); - GenomeLocParser.setupRefContigOrdering(ref); + genomeLocParser = new GenomeLocParser(ref); + + engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); } /** @@ -99,17 +103,17 @@ public class TraverseReadsUnitTest extends BaseTest { countReadWalker = new CountReadsWalker(); traversalEngine = new TraverseReads(); - - + traversalEngine.initialize(engine); } /** Test out that we can shard the file and iterate over every read */ @Test public void testUnmappedReadCount() { - SAMDataSource dataSource = new SAMDataSource(bamList); + SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), - readSize); + readSize, + genomeLocParser); countReadWalker.initialize(); Object accumulator = countReadWalker.reduceInit(); @@ -121,7 +125,7 @@ public class TraverseReadsUnitTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null,null); accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java index ccdb57a87..71774c7da 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java @@ -20,67 +20,45 @@ import org.testng.annotations.Test; * Test out the functionality of the new genome loc parser */ public class GenomeLocParserUnitTest extends BaseTest { - @Test(expectedExceptions=ReviewedStingException.class) - public void testUnsetupException() { - SAMSequenceDictionary contigInfoCache = GenomeLocParser.contigInfo; - GenomeLocParser.contigInfo = null; - try { - GenomeLocParser.createGenomeLoc(0, 0, 0); - } - finally { - GenomeLocParser.contigInfo = contigInfoCache; - } - } + private GenomeLocParser genomeLocParser; @BeforeClass public void init() { - GenomeLocParserTestUtils.clearSequenceDictionary(); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - } - - @Test - public void testKnownContigOrder() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - GenomeLocParser.contigInfo = null; - // assert that it's false when the contig ordering is not setup - assertTrue(!GenomeLocParser.hasKnownContigOrdering()); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - // assert that it's true when it is setup - assertTrue(GenomeLocParser.hasKnownContigOrdering()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @Test(expectedExceptions=RuntimeException.class) public void testGetContigIndex() { - assertEquals(GenomeLocParser.getContigIndex("blah",true), -1); // should not be in the reference + assertEquals(genomeLocParser.getContigIndex("blah",true), -1); // should not be in the reference } @Test public void testGetContigIndexValid() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - assertEquals(GenomeLocParser.getContigIndex("chr1",true), 0); // should be in the reference + assertEquals(genomeLocParser.getContigIndex("chr1",true), 0); // should be in the reference } @Test public void testGetContigInfoUnknownContig() { - assertEquals(null, GenomeLocParser.getContigInfo("blah")); // should be in the reference + assertEquals(null, genomeLocParser.getContigInfo("blah")); // should be in the reference } @Test public void testGetContigInfoKnownContig() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - assertEquals(0, "chr1".compareTo(GenomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference + assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference } @Test(expectedExceptions=ReviewedStingException.class) public void testParseBadString() { - GenomeLocParser.parseGenomeLoc("Bad:0-1"); + genomeLocParser.parseGenomeLoc("Bad:0-1"); } @Test public void testParseGoodString() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-100"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-100"); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); @@ -88,7 +66,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc1() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); @@ -96,7 +74,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc1point5() { // in honor of VAAL! - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1"); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); @@ -104,23 +82,23 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc2() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); - assertEquals(0, loc.getContigIndex()); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); + assertEquals("chr1", loc.getContig()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); } @Test public void testCreateGenomeLoc3() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1); - assertEquals(0, loc.getContigIndex()); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1); + assertEquals("chr1", loc.getContig()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); } @Test public void testCreateGenomeLoc4() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); @@ -128,8 +106,8 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc5() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); - GenomeLoc copy = GenomeLocParser.createGenomeLoc(loc); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); + GenomeLoc copy = genomeLocParser.createGenomeLoc(loc.getContig(),loc.getStart(),loc.getStop()); assertEquals(0, copy.getContigIndex()); assertEquals(copy.getStop(), 100); assertEquals(copy.getStart(), 1); @@ -137,7 +115,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testGenomeLocPlusSign() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1+"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1+"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -145,7 +123,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testGenomeLocParseOnlyChrome() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -153,7 +131,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocParseOnlyBadChrome() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr12"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr12"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -161,7 +139,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -169,7 +147,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad2() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-500-0"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -177,7 +155,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad3() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1--0"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -186,19 +164,19 @@ public class GenomeLocParserUnitTest extends BaseTest { // test out the validating methods @Test public void testValidationOfGenomeLocs() { - assertTrue(GenomeLocParser.validGenomeLoc("chr1",1,1)); - assertTrue(!GenomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end + assertTrue(genomeLocParser.validGenomeLoc("chr1",1,1)); + assertTrue(!genomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry + assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig + assertTrue(!genomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start + assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop + assertTrue(!genomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end - assertTrue(GenomeLocParser.validGenomeLoc(0,1,1)); - assertTrue(!GenomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry - assertTrue(!GenomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig - assertTrue(!GenomeLocParser.validGenomeLoc(-1,0,10)); // bad start - assertTrue(!GenomeLocParser.validGenomeLoc(0,1,-2)); // bad stop - assertTrue(!GenomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end + assertTrue(genomeLocParser.validGenomeLoc(0,1,1)); + assertTrue(!genomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry + assertTrue(!genomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig + assertTrue(!genomeLocParser.validGenomeLoc(-1,0,10)); // bad start + assertTrue(!genomeLocParser.validGenomeLoc(0,1,-2)); // bad stop + assertTrue(!genomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end } } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java index fc7099b7b..8e4ff8a2c 100755 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java @@ -48,20 +48,23 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { private static final int STARTING_CHROMOSOME = 1; private static final int CHROMOSOME_SIZE = 1000; + private GenomeLocParser genomeLocParser; + private String contigOneName; + @BeforeClass public void setup() { - GenomeLocParserTestUtils.clearSequenceDictionary(); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + contigOneName = header.getSequenceDictionary().getSequence(1).getSequenceName(); } @BeforeMethod public void initializeSortedSet() { - mSortedSet = new GenomeLocSortedSet(); + mSortedSet = new GenomeLocSortedSet(genomeLocParser); } @Test public void testAdd() { - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -70,7 +73,7 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void testRemove() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.remove(g); @@ -80,9 +83,9 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void addRegion() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 1, 50); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50); mSortedSet.add(g); - GenomeLoc f = GenomeLocParser.createGenomeLoc(1, 30, 80); + GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 1); @@ -92,7 +95,7 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testAddDuplicate() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.add(g); @@ -100,8 +103,8 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void mergingOverlappingBelow() { - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 50); - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 49, 100); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -116,8 +119,8 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void mergingOverlappingAbove() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -132,22 +135,22 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void deleteAllByRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 101; x++) { - GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(del)); + GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName,x,x); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,del)); } assertTrue(mSortedSet.isEmpty()); } @Test public void deleteSomeByRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 50; x++) { - GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(del)); + GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName,x,x); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,del)); } assertTrue(!mSortedSet.isEmpty()); assertTrue(mSortedSet.size() == 1); @@ -159,14 +162,14 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void deleteSuperRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 10, 20); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 70, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100); mSortedSet.add(g); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 2); // now delete a region - GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 15, 75); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(d)); + GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,d)); Iterator iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertTrue(loc.getStart() == 10); @@ -181,13 +184,13 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void substractComplexExample() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 20); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 20); mSortedSet.add(e); - GenomeLoc r1 = GenomeLocParser.createGenomeLoc(1, 3, 5); - GenomeLoc r2 = GenomeLocParser.createGenomeLoc(1, 10, 12); - GenomeLoc r3 = GenomeLocParser.createGenomeLoc(1, 16, 18); - GenomeLocSortedSet toExclude = new GenomeLocSortedSet(Arrays.asList(r1, r2, r3)); + GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5); + GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12); + GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18); + GenomeLocSortedSet toExclude = new GenomeLocSortedSet(genomeLocParser,Arrays.asList(r1, r2, r3)); GenomeLocSortedSet remaining = mSortedSet.subtractRegions(toExclude); // logger.debug("Initial " + mSortedSet); @@ -204,10 +207,10 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { GenomeLoc p3 = it.next(); GenomeLoc p4 = it.next(); - assertEquals(GenomeLocParser.createGenomeLoc(1, 1, 2), p1); - assertEquals(GenomeLocParser.createGenomeLoc(1, 6, 9), p2); - assertEquals(GenomeLocParser.createGenomeLoc(1, 13, 15), p3); - assertEquals(GenomeLocParser.createGenomeLoc(1, 19, 20), p4); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 1, 2), p1); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 6, 9), p2); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 13, 15), p3); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 19, 20), p4); } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java index 27e694bf4..81d6446bf 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java @@ -21,13 +21,13 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; */ public class GenomeLocUnitTest extends BaseTest { private static ReferenceSequenceFile seq; + private GenomeLocParser genomeLocParser; @BeforeClass public void init() throws FileNotFoundException { // sequence - GenomeLocParserTestUtils.clearSequenceDictionary(); seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } /** @@ -37,10 +37,10 @@ public class GenomeLocUnitTest extends BaseTest { public void testIsBetween() { logger.warn("Executing testIsBetween"); - GenomeLoc locMiddle = GenomeLocParser.createGenomeLoc("chr1", 3, 3); + GenomeLoc locMiddle = genomeLocParser.createGenomeLoc("chr1", 3, 3); - GenomeLoc locLeft = GenomeLocParser.createGenomeLoc("chr1", 1, 1); - GenomeLoc locRight = GenomeLocParser.createGenomeLoc("chr1", 5, 5); + GenomeLoc locLeft = genomeLocParser.createGenomeLoc("chr1", 1, 1); + GenomeLoc locRight = genomeLocParser.createGenomeLoc("chr1", 5, 5); Assert.assertTrue(locMiddle.isBetween(locLeft, locRight)); Assert.assertFalse(locLeft.isBetween(locMiddle, locRight)); @@ -50,15 +50,15 @@ public class GenomeLocUnitTest extends BaseTest { @Test public void testContigIndex() { logger.warn("Executing testContigIndex"); - GenomeLoc locOne = GenomeLocParser.createGenomeLoc("chr1",1,1); + GenomeLoc locOne = genomeLocParser.createGenomeLoc("chr1",1,1); Assert.assertEquals(1, locOne.getContigIndex()); Assert.assertEquals("chr1", locOne.getContig()); - GenomeLoc locX = GenomeLocParser.createGenomeLoc("chrX",1,1); + GenomeLoc locX = genomeLocParser.createGenomeLoc("chrX",1,1); Assert.assertEquals(23, locX.getContigIndex()); Assert.assertEquals("chrX", locX.getContig()); - GenomeLoc locNumber = GenomeLocParser.createGenomeLoc(1,1,1); + GenomeLoc locNumber = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(),1,1); Assert.assertEquals(1, locNumber.getContigIndex()); Assert.assertEquals("chr1", locNumber.getContig()); Assert.assertEquals(0, locOne.compareTo(locNumber)); @@ -68,15 +68,15 @@ public class GenomeLocUnitTest extends BaseTest { @Test public void testCompareTo() { logger.warn("Executing testCompareTo"); - GenomeLoc twoOne = GenomeLocParser.createGenomeLoc("chr2", 1); - GenomeLoc twoFive = GenomeLocParser.createGenomeLoc("chr2", 5); - GenomeLoc twoOtherFive = GenomeLocParser.createGenomeLoc("chr2", 5); + GenomeLoc twoOne = genomeLocParser.createGenomeLoc("chr2", 1); + GenomeLoc twoFive = genomeLocParser.createGenomeLoc("chr2", 5); + GenomeLoc twoOtherFive = genomeLocParser.createGenomeLoc("chr2", 5); Assert.assertEquals(twoFive.compareTo(twoOtherFive), 0); Assert.assertEquals(twoOne.compareTo(twoFive), -1); Assert.assertEquals(twoFive.compareTo(twoOne), 1); - GenomeLoc oneOne = GenomeLocParser.createGenomeLoc("chr1", 5); + GenomeLoc oneOne = genomeLocParser.createGenomeLoc("chr1", 5); Assert.assertEquals(oneOne.compareTo(twoOne), -1); Assert.assertEquals(twoOne.compareTo(oneOne), 1); } diff --git a/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java b/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java index 364c11351..8a470303d 100644 --- a/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -18,25 +17,25 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; public class BedParserUnitTest extends BaseTest { private static IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; private File bedFile = new File("testdata/sampleBedFile.bed"); @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @Test public void testLoadBedFile() { - BedParser parser = new BedParser(bedFile); + BedParser parser = new BedParser(genomeLocParser,bedFile); List location = parser.getLocations(); Assert.assertEquals(location.size(), 4); } @Test public void testBedParsing() { - BedParser parser = new BedParser(bedFile); + BedParser parser = new BedParser(genomeLocParser,bedFile); List location = parser.getLocations(); Assert.assertEquals(location.size(), 4); Assert.assertTrue(location.get(0).getContig().equals("20")); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java index cc8c61948..c2b622738 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype.glf; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; @@ -59,6 +58,9 @@ public class GLFWriterUnitTest extends BaseTest { protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"}; protected final static double SIGNIFICANCE = 5.1; + private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; + @BeforeMethod public void before() { @@ -66,10 +68,8 @@ public class GLFWriterUnitTest extends BaseTest { @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); - IndexedFastaSequenceFile seq; seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @@ -113,7 +113,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 100; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createLikelihoods(x % 10)); } rec.close(); @@ -131,7 +131,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 5; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createGreaterThan255MinimumGenotype(x % 10)); } rec.close(); @@ -150,7 +150,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 100; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createLikelihoods(x % 10)); } rec.close(); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index b0602e6f6..4f3a43e4d 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -6,7 +6,6 @@ import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -36,12 +35,12 @@ public class VCFWriterUnitTest extends BaseTest { private Set metaData = new HashSet(); private Set additionalColumns = new HashSet(); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); + private GenomeLocParser genomeLocParser; @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } /** test, using the writer and reader, that we can output and input a VCF file without problems */ @@ -110,7 +109,7 @@ public class VCFWriterUnitTest extends BaseTest { */ private VariantContext createVC(VCFHeader header) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1",1); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1",1); List alleles = new ArrayList(); Set filters = null; Map attributes = new HashMap(); diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java index e48e3e4c0..752695052 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java @@ -55,23 +55,25 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { private static List results1 = null; private static List results2 = null; + private GenomeLocParser genomeLocParser; + @BeforeClass public void init() { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); + genomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); results1 = new ArrayList(); results2 = new ArrayList(); - results1.add(GenomeLocParser.createGenomeLoc("chr1",1554)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",18932,19000)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",19001,25000)); - results1.add(GenomeLocParser.createGenomeLoc("chr5",7415,7600)); + results1.add(genomeLocParser.createGenomeLoc("chr1",1554)); + results1.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); + results1.add(genomeLocParser.createGenomeLoc("chr1",18932,19000)); + results1.add(genomeLocParser.createGenomeLoc("chr1",19001,25000)); + results1.add(genomeLocParser.createGenomeLoc("chr5",7415,7600)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",1554)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); - results2.add(GenomeLocParser.createGenomeLoc("chr5",7415,7600)); + results2.add(genomeLocParser.createGenomeLoc("chr1",1554)); + results2.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); + results2.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); + results2.add(genomeLocParser.createGenomeLoc("chr5",7415,7600)); } @@ -79,7 +81,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_Overlap() { logger.warn("Executing testGATKIntervalFileIterator_Overlap"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); @@ -93,7 +95,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_OverlapWithException() { logger.warn("Executing testGATKIntervalFileIterator_OverlapWithException"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); try { while(it.hasNext()) { @@ -110,7 +112,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_All() { logger.warn("Executing testGATKIntervalFileIterator_All"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.ALL); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.ALL); Iterator check_it = results2.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); @@ -124,7 +126,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testBEDIntervalFileIterator_Overlap() { logger.warn("Executing testBEDIntervalFileIterator_Overlap"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameBED),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameBED),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java index a74cc54e7..d4fcb8b9f 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java @@ -21,13 +21,12 @@ import java.util.List; public class IntervalUtilsTest extends BaseTest { // used to seed the genome loc parser with a sequence dictionary private static ReferenceSequenceFile seq; - - + private GenomeLocParser genomeLocParser; @BeforeClass public void init() throws FileNotFoundException { seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @@ -40,9 +39,9 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 2 == 0) - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); else - listEveryTwoFromOne.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromOne.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION); @@ -60,8 +59,8 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 2 == 0) - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); - allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); + allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); @@ -79,8 +78,8 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 5 == 0) { - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); - allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); + allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } } diff --git a/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java index 63d84f082..0b4e52a3d 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java @@ -47,6 +47,7 @@ import java.io.File; public class NwayIntervalMergingIteratorUnitTest extends BaseTest { private static File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta"); + private GenomeLocParser genomeLocParser; private static List stream1 = null; private static List stream2 = null; @@ -54,32 +55,32 @@ public class NwayIntervalMergingIteratorUnitTest extends BaseTest { @BeforeClass public static void init() { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); + GenomeLocParser genomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); stream1 = new ArrayList(); stream2 = new ArrayList(); expected = new ArrayList(); - stream1.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2600,2610)); // 4 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2609,2625)); // 4 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",18932,19000)); // 6 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",19001,25000)); //6 + stream1.add(genomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2600,2610)); // 4 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2609,2625)); // 4 + stream1.add(genomeLocParser.createGenomeLoc("chr1",18932,19000)); // 6 + stream1.add(genomeLocParser.createGenomeLoc("chr1",19001,25000)); //6 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",2598,2604)); // 4 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 + stream2.add(genomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 + stream2.add(genomeLocParser.createGenomeLoc("chr1",2598,2604)); // 4 + stream2.add(genomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 + stream2.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 + stream2.add(genomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 - expected.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 - expected.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 - expected.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 - expected.add(GenomeLocParser.createGenomeLoc("chr1",2598,2625)); // 4 - expected.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 - expected.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 - expected.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 + expected.add(genomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 + expected.add(genomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 + expected.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 + expected.add(genomeLocParser.createGenomeLoc("chr1",2598,2625)); // 4 + expected.add(genomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 + expected.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 + expected.add(genomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 } diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala index a760b220b..f444044bf 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -60,13 +60,13 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction { object IntervalScatterFunction { private def parseLocs(referenceSource: ReferenceDataSource, intervals: List[String]) = { - GenomeLocParser.setupRefContigOrdering(referenceSource.getReference) + var genomeLocParser: GenomeLocParser = new GenomeLocParser(referenceSource.getReference) val locs = { // TODO: Abstract genome analysis engine has richer logic for parsing. We need to use it! if (intervals.size == 0) { GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSource.getReference.getSequenceDictionary) } else { - new GenomeLocSortedSet(IntervalUtils.parseIntervalArguments(intervals, false)) + new GenomeLocSortedSet(genomeLocParser,IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, false)) } } if (locs == null || locs.size == 0) diff --git a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala index 1998b6c76..19c34071e 100755 --- a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala @@ -13,8 +13,8 @@ class PipelineUtils { object PipelineUtils{ def smartSplitContigs(reference: File, intervals: File, sets: Int) : List[List[String]] = { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(reference)) - val targets = IntervalUtils.parseIntervalArguments(List(intervals.getAbsolutePath), false) + var genomeLocParser: GenomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(reference)) + val targets = IntervalUtils.parseIntervalArguments(genomeLocParser,List(intervals.getAbsolutePath), false) // Build up a map of contigs with sizes. var contigSizes = Map.empty[String, Long] diff --git a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala index 42ac83aa4..bcd2e254e 100644 --- a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala @@ -8,17 +8,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils import org.broadinstitute.sting.queue.QException import net.sf.picard.reference.IndexedFastaSequenceFile import org.testng.annotations.{Test, BeforeMethod} -import org.broadinstitute.sting.utils.{GenomeLocParserTestUtils, GenomeLocParser} +import org.broadinstitute.sting.utils.GenomeLocParser class IntervalScatterFunctionUnitTest extends BaseTest { private def reference = new File(BaseTest.b36KGReference) private var header: IndexedFastaSequenceFile = _ + private var genomeLocParser: GenomeLocParser = _ @BeforeMethod def setup() { - GenomeLocParserTestUtils.clearSequenceDictionary() header = new IndexedFastaSequenceFile(reference) - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()) + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()) } @Test @@ -30,17 +30,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testBasicScatter = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") val files = (1 to 3).toList.map(index => new File(testDir + "basic." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, false) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -53,18 +53,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterLessFiles = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") - val chr4 = GenomeLocParser.parseGenomeInterval("4") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") + val chr4 = genomeLocParser.parseGenomeInterval("4") val files = (1 to 3).toList.map(index => new File(testDir + "less." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, false) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -85,18 +85,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterIntervals = { val intervals = List("1:1-2", "1:4-5", "2:1-1", "3:2-2") - val chr1a = GenomeLocParser.parseGenomeInterval("1:1-2") - val chr1b = GenomeLocParser.parseGenomeInterval("1:4-5") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1a = genomeLocParser.parseGenomeInterval("1:1-2") + val chr1b = genomeLocParser.parseGenomeInterval("1:4-5") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -111,17 +111,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterOrder = { val intervals = List("2:1-1", "1:1-1", "3:2-2") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -134,17 +134,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testBasicScatterByContig = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") val files = (1 to 3).toList.map(index => new File(testDir + "contig_basic." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -157,18 +157,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigLessFiles = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") - val chr4 = GenomeLocParser.parseGenomeInterval("4") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") + val chr4 = genomeLocParser.parseGenomeInterval("4") val files = (1 to 3).toList.map(index => new File(testDir + "contig_less." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -189,18 +189,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsStart = { val intervals = List("1:1-2", "1:4-5", "2:1-1", "3:2-2") - val chr1a = GenomeLocParser.parseGenomeInterval("1:1-2") - val chr1b = GenomeLocParser.parseGenomeInterval("1:4-5") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1a = genomeLocParser.parseGenomeInterval("1:1-2") + val chr1b = genomeLocParser.parseGenomeInterval("1:4-5") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_start." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -215,18 +215,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsMiddle = { val intervals = List("1:1-1", "2:1-2", "2:4-5", "3:2-2") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2a = GenomeLocParser.parseGenomeInterval("2:1-2") - val chr2b = GenomeLocParser.parseGenomeInterval("2:4-5") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2a = genomeLocParser.parseGenomeInterval("2:1-2") + val chr2b = genomeLocParser.parseGenomeInterval("2:4-5") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_middle." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(2, locs2.size) @@ -241,18 +241,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsEnd = { val intervals = List("1:1-1", "2:2-2", "3:1-2", "3:4-5") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2 = GenomeLocParser.parseGenomeInterval("2:2-2") - val chr3a = GenomeLocParser.parseGenomeInterval("3:1-2") - val chr3b = GenomeLocParser.parseGenomeInterval("3:4-5") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2 = genomeLocParser.parseGenomeInterval("2:2-2") + val chr3a = genomeLocParser.parseGenomeInterval("3:1-2") + val chr3b = genomeLocParser.parseGenomeInterval("3:4-5") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_end." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size)