diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index ccc680347..7d3a001e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -101,7 +101,7 @@ public class GenomeAnalysisEngine { // Prepare the sort ordering w.r.t. the sequence dictionary if (argCollection.referenceFile != null) { final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); - GenomeLoc.setupRefContigOrdering(refFile); + GenomeLocParser.setupRefContigOrdering(refFile); } // Determine the validation stringency. Default to ValidationStringency.STRICT. @@ -145,7 +145,7 @@ public class GenomeAnalysisEngine { // Prepare the sort ordering w.r.t. the sequence dictionary if (argCollection.referenceFile != null) { final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); - GenomeLoc.setupRefContigOrdering(refFile); + GenomeLocParser.setupRefContigOrdering(refFile); } // Determine the validation stringency. Default to ValidationStringency.STRICT. @@ -222,10 +222,10 @@ public class GenomeAnalysisEngine { if ( intervalsString != null) { if (new File(intervalsString).exists()) { if (! quiet) logger.info("Intervals argument specifies a file. Loading intervals from file."); - locs = GenomeLoc.IntervalFileToList(intervalsString); + locs = GenomeLocParser.intervalFileToList(intervalsString); } else { if (! quiet) logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string."); - locs = GenomeLoc.parseGenomeLocs(intervalsString); + locs = GenomeLocParser.parseGenomeLocs(intervalsString); } } return locs; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java index 3f18c1d4c..03611fdfb 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java @@ -5,6 +5,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; /** @@ -63,7 +64,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { LocusShardStrategy( SAMSequenceDictionary dic ) { this.dic = dic; limitingFactor = -1; - mLoc = new GenomeLoc(0, 0, 0); + mLoc = GenomeLocParser.createGenomeLoc(0, 0, 0); if (dic.getSequences().size() > 0) { nextContig = true; } @@ -98,7 +99,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { throw new IllegalArgumentException("Interval files must contain at least one interval"); } GenomeLoc loc = intervals.iterator().next(); - mLoc = new GenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1); + mLoc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1); if (dic.getSequences().size() > 0) { nextContig = true; } @@ -174,7 +175,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { intervals.removeRegion(loc); return new IntervalShard(loc); } else { - GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1); + GenomeLoc subLoc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1); intervals.removeRegion(subLoc); return new IntervalShard(subLoc); } @@ -193,7 +194,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { // can we fit it into the current seq size? if (nextStart + proposedSize - 1 < length) { lastGenomeLocSize = proposedSize; - mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1); + mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1); return LocusShard.toShard(mLoc); } // else we can't make it in the current location, we have to stitch one together @@ -207,7 +208,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { // move to the next contig // the next sequence should start at the begining of the next contig - Shard ret = LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1)); + Shard ret = LocusShard.toShard(GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1)); // now jump ahead to the next contig jumpContig(); @@ -226,7 +227,7 @@ public abstract class LocusShardStrategy implements ShardStrategy { return; } logger.debug("Next contig, index = " + dic.getSequence(seqLoc).getSequenceIndex()); - mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0); + mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index bb278ba54..ea0d3e4e4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -14,6 +14,7 @@ import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.File; import java.util.ArrayList; @@ -184,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource { if (!intoUnmappedReads) { if (lastReadPos == null) { - lastReadPos = new GenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); + lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); iter = iteratorPool.iterator(lastReadPos); return InitialReadIterator(shard.getSize(), iter); } else { @@ -280,7 +281,8 @@ public class SAMDataSource implements SimpleDataSource { ++x; } else { // jump contigs - if (lastReadPos.toNextContig() == false) { + lastReadPos = GenomeLocParser.toNextContig(lastReadPos); + if (lastReadPos == null) { // check to see if we're using unmapped reads, if not return, we're done readsTaken = 0; intoUnmappedReads = true; @@ -304,7 +306,7 @@ public class SAMDataSource implements SimpleDataSource { else if (rec != null) { int stopPos = rec.getAlignmentStart(); if (stopPos < lastReadPos.getStart()) { - lastReadPos = new GenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos); + lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos); } else { lastReadPos.setStart(rec.getAlignmentStart()); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index ddf418fba..c64ed721b 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -18,9 +18,9 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.io.File; @@ -202,7 +202,7 @@ public abstract class MicroScheduler { catch (FileNotFoundException ex) { throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex); } - GenomeLoc.setupRefContigOrdering(ref); + GenomeLocParser.setupRefContigOrdering(ref); return ref; } } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index bead63818..ec269a5db 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.NoSuchElementException; /** @@ -39,7 +39,7 @@ public class GenomeLocusIterator implements LocusIterator { */ public GenomeLocusIterator( GenomeLoc completeLocus ) { this.completeLocus = completeLocus; - this.currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart()); + this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusContextIteratorByHanger.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusContextIteratorByHanger.java index 0995e7691..1e232890a 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusContextIteratorByHanger.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusContextIteratorByHanger.java @@ -1,15 +1,16 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.SAMRecord; import net.sf.samtools.AlignmentBlock; -import org.broadinstitute.sting.utils.*; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.RefHanger; +import org.broadinstitute.sting.utils.Utils; import java.util.Iterator; -import org.broadinstitute.sting.utils.RefHanger; -import org.broadinstitute.sting.gatk.LocusContext; -import org.apache.log4j.Logger; - /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ @@ -103,12 +104,12 @@ public class LocusContextIteratorByHanger extends LocusContextIterator { } protected void hangRead(final SAMRecord read) { - GenomeLoc readLoc = new GenomeLoc(read); + GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); for ( AlignmentBlock block : read.getAlignmentBlocks() ) { if ( DEBUG ) logger.debug(String.format("Processing block %s len=%d", block, block.getLength())); for ( int i = 0; i < block.getLength(); i++ ) { - GenomeLoc offset = new GenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i); + GenomeLoc offset = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i); readHanger.expandingPut(offset, read); offsetHanger.expandingPut(offset, block.getReadStart() + i - 1); if ( DEBUG ) logger.debug(String.format(" # Added %s", offset)); @@ -134,7 +135,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator { return true; else { final SAMRecord read = it.peek(); - GenomeLoc readLoc = new GenomeLoc(read); + GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); final boolean coveredP = currentPositionIsFullyCovered(readLoc); //System.out.printf("CoverP = %s => %b%n", readLoc, coveredP); return coveredP; @@ -161,7 +162,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator { SAMRecord read = it.next(); justCleared = false; - GenomeLoc readLoc = new GenomeLoc(read); + GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); if ( DEBUG ) { logger.debug(String.format(" Expanding window sizes %d with %d : left=%s, right=%s, readLoc = %s, cmp=%d", readHanger.size(), incrementSize, diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java index 20fb3a8c7..58d40dfa1 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java @@ -5,6 +5,7 @@ import net.sf.samtools.util.RuntimeIOException; import net.sf.samtools.util.StringUtil; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import java.util.Iterator; @@ -65,7 +66,7 @@ public class ReferenceIterator implements Iterator { } public GenomeLoc getLocation() { - return new GenomeLoc(getCurrentContig().getName(), getPosition()); + return GenomeLocParser.createGenomeLoc(getCurrentContig().getName(), getPosition()); } // -------------------------------------------------------------------------------------------------------------- @@ -137,11 +138,11 @@ public class ReferenceIterator implements Iterator { if (seekOffset < offset ) { // bad boy -- can't go backward safely throw new IllegalArgumentException(String.format("Invalid seek %s => %s, which is usually due to out of order reads%n", - new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset))); + GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); } else if (seekOffset >= currentContig.length()) { // bad boy -- can't go beyond the contig length throw new IllegalArgumentException(String.format("Invalid seek to %s, which is beyond the end of the contig%n", - new GenomeLoc(currentContig.getName(), seekOffset + 1))); + GenomeLocParser.createGenomeLoc(currentContig.getName(), seekOffset + 1))); } else { offset = seekOffset - 1; return next(); @@ -160,15 +161,16 @@ public class ReferenceIterator implements Iterator { if (DEBUG) logger.debug(String.format(" -> Seeking to %s %d from %s %d%n", seekContigName, seekOffset, currentContig.getName(), offset)); - int cmpContigs = GenomeLoc.compareContigs(seekContigName, currentContig.getName()); + + int cmpContigs = GenomeLocParser.compareContigs(seekContigName,currentContig.getName()); - if ( cmpContigs < 0 && GenomeLoc.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late! + if ( cmpContigs < 0 && GenomeLocParser.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late! // The contig we are looking for is before the currentContig -- it's an error throw new IllegalArgumentException(String.format("Invalid seek %s => %s, contigs/sequences are out of order%n", - new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset))); + GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); } - if ( cmpContigs > 0 || (! GenomeLoc.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek + if ( cmpContigs > 0 || (! GenomeLocParser.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek // then try to seek forward in the reference file until we get the contig we need if (DEBUG) logger.debug(String.format(" -> Seeking in the fasta file to %s from %s%n", seekContigName, currentContig.getName())); @@ -176,7 +178,7 @@ public class ReferenceIterator implements Iterator { if (!refFile.seekToContig(seekContigName)) { // ok, do the seek // a false result indicates a failure, throw a somewhat cryptic call throw new RuntimeIOException(String.format("Unexpected seek failure from %s to %s%n", - new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset))); + GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); } readNextContig(); // since we haven't failed, we just read in the next contig (which is seekContigName) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java index 2f96cd463..af143fa32 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.RuntimeIOException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.Reads; import java.util.Iterator; @@ -63,8 +64,8 @@ public class VerifyingSamIterator implements StingSAMIterator { if ( last == null || cur.getReadUnmappedFlag() ) return false; else { - GenomeLoc lastLoc = new GenomeLoc( last ); - GenomeLoc curLoc = new GenomeLoc( cur ); + GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last ); + GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur ); return curLoc.compareTo(lastLoc) == -1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java index 510601fec..16c308c8f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.List; import java.util.Arrays; @@ -79,7 +80,7 @@ public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum { varFreq = Double.parseDouble(parts[11]); // CEU_var_freq totalCounts = Integer.parseInt(parts[12]); // CEU_var - loc = GenomeLoc.parseGenomeLoc(contig, start, stop); + loc = GenomeLocParser.parseGenomeLoc(contig, start, stop); } catch ( RuntimeException e ) { System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts)); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java index 848a3c2d8..6da9736e1 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java @@ -1,19 +1,9 @@ package org.broadinstitute.sting.gatk.refdata; import java.util.*; -import java.util.regex.MatchResult; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.xReadLines; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom @@ -28,7 +18,7 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes { } public GenomeLoc getLocation() { - loc = new GenomeLoc(this.get("0"), Long.parseLong(this.get("1"))); + loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1"))); return loc; } public String getRefBasesFWD() { return this.get("2"); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java index 350f3a6bf..877766e85 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java @@ -6,10 +6,7 @@ import java.util.*; import java.util.regex.Pattern; import java.util.regex.Matcher; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.Pileup; -import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.*; import net.sf.picard.reference.ReferenceSequenceFileWalker; @@ -97,8 +94,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { if ( refBaseChar == '*' ) { parseIndels(observedString) ; - if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength-1); - else loc = new GenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!! + if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.createGenomeLoc(contig, start, start+eventLength-1); + else loc = GenomeLocParser.createGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!! } else { parseBasesAndQuals(line,pos[7]+1,pos[8], pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) ); // parseBasesAndQuals(line.substring(pos[7]+1,pos[8]), line.substring(pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) ) ); @@ -108,7 +105,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { refBases = line.substring(pos[1]+1, pos[2]).toUpperCase(); eventLength = 1; //loc = new GenomeLoc(contig, start, start+1); - loc = new GenomeLoc(contig, start, start); + loc = GenomeLocParser.createGenomeLoc(contig, start, start); char ch = observedString.charAt(0); @@ -170,8 +167,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { if ( refBaseChar == '*' ) { parseIndels(parts[3]) ; - if ( varType == DELETION_VARIANT ) loc = GenomeLoc.parseGenomeLoc(contig, start, start+eventLength-1); - else loc = GenomeLoc.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!! + if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.parseGenomeLoc(contig, start, start+eventLength-1); + else loc = GenomeLocParser.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!! } else { parseBasesAndQuals(parts[8], parts[9]); @@ -181,7 +178,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { refBases = parts[2].toUpperCase(); eventLength = 1; //loc = GenomeLoc.parseGenomeLoc(contig, start, start+1); - loc = GenomeLoc.parseGenomeLoc(contig, start, start); + loc = GenomeLocParser.parseGenomeLoc(contig, start, start); char ch = parts[3].charAt(0); @@ -623,7 +620,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { System.exit(1); } - GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary()); int counter = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java index 70814d4b9..5fcba0356 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.gatk.refdata; import java.util.*; -import java.util.regex.MatchResult; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.apache.log4j.Logger; /** @@ -201,7 +200,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map getHeader() { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java b/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java index dcb200b4b..f9435ea24 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java @@ -5,6 +5,7 @@ import java.util.List; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; /** Holds a single transcript annotation: refseq id, gene name, genomic locations of the locus, of the coding region * and of all the exons. @@ -75,8 +76,8 @@ public class Transcript { else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]); String contig_name = fields[2]; - transcript_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); - transcript_coding_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])); + transcript_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); + transcript_coding_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])); gene_name = fields[12]; String[] exon_starts = fields[9].split(","); String[] exon_stops = fields[10].split(","); @@ -89,7 +90,7 @@ public class Transcript { exon_frames = new ArrayList(eframes.length); for ( int i = 0 ; i < exon_starts.length ; i++ ) { - exons.add(GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); + exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); exon_frames.add(Integer.decode(eframes[i])); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index e1ab70119..6fc327af5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -4,10 +4,10 @@ import net.sf.picard.util.SequenceUtil; import java.util.*; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.MalformedGenomeLocException; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.refdata.AllelicVariant; /** @@ -162,7 +162,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria String contig = parts[1]; long start = Long.parseLong(parts[2]) + 1; // The final is 0 based long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based - loc = GenomeLoc.parseGenomeLoc(contig, start, stop-1); + loc = GenomeLocParser.parseGenomeLoc(contig, start, stop-1); name = parts[4]; refBases = parts[5]; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java index fb701b6fb..eb3f397f4 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.refdata; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; + import java.util.HashMap; -import java.util.Scanner; import java.util.Map; +import java.util.Scanner; import java.util.regex.MatchResult; import java.util.regex.Pattern; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; - /** * Class for representing arbitrary reference ordered data sets * @@ -73,7 +73,7 @@ public class rodGFF extends BasicReferenceOrderedDatum { } public GenomeLoc getLocation() { - return GenomeLoc.parseGenomeLoc(contig, start, stop); + return GenomeLocParser.parseGenomeLoc(contig, start, stop); } public String getAttribute(final String key) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java index ba69d729b..f1cd9b5d8 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.GenomeLocParser; public class rodRefSeq extends BasicReferenceOrderedDatum { @@ -178,8 +179,8 @@ class refSeqIterator implements Iterator { while ( reader.hasNext() ) { Transcript t = reader.peek(); - int ci1 = GenomeLoc.getContigIndex(curr_contig_name); - int ci2 = GenomeLoc.getContigIndex( t.getLocation().getContig() ); + int ci1 = GenomeLocParser.getContigIndex(curr_contig_name); + int ci2 = GenomeLocParser.getContigIndex( t.getLocation().getContig() ); if ( ci1 > ci2 ) throw new StingException("RefSeq track seems to be not contig-ordered"); if ( ci1 < ci2 ) break; // next transcript is on the next contig, we do not need it yet... if ( t.getLocation().getStart() > curr_position ) break; // next transcript is on the same contig but starts after the current position; we are done @@ -192,7 +193,7 @@ class refSeqIterator implements Iterator { // 'records' and current position are fully updated. We can now create new rod and return it (NOTE: this iterator will break if the list // of pre-loaded records is meddled with by the clients between iterations, so we return them as unmodifiable list) - rodRefSeq rod = new rodRefSeq(name,GenomeLoc.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records)); + rodRefSeq rod = new rodRefSeq(name, GenomeLocParser.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records)); // if ( (++z) % 1000000 == 0 ) { // System.out.println(rod.getLocation()+": holding "+records.size()+ "; time per 1M ref positions: "+((double)(System.currentTimeMillis()-t)/1000.0)+" s"); // z = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java index cc7adcb5c..917cec9be 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java @@ -7,6 +7,7 @@ import java.util.*; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import net.sf.picard.reference.ReferenceSequenceFileWalker; @@ -192,7 +193,7 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype System.exit(1); } - GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary()); int counter = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java index 9240e1d85..8026b1041 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.IOException; @@ -22,7 +23,7 @@ public class rodVariants extends BasicReferenceOrderedDatum { public boolean parseLine(Object header, String[] parts) throws IOException { if (!parts[0].startsWith("#")) { - loc = new GenomeLoc(parts[0], Long.valueOf(parts[1])); + loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1])); refBase = parts[2].charAt(0); depth = Integer.valueOf(parts[3]); maxMappingQuality = Integer.valueOf(parts[4]); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index dbe33dcb2..f9fde41ca 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -443,7 +443,7 @@ public abstract class TraversalEngine { //this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName); this.refFile = new FastaSequenceFile2(refFileName); // todo: replace when FastaSequenceFile2 is in picard this.refIter = new ReferenceIterator(this.refFile); - if (!GenomeLoc.setupRefContigOrdering(this.refFile)) { + if (!GenomeLocParser.setupRefContigOrdering(this.refFile)) { // We couldn't process the reference contig ordering, fail since we need it Utils.scareUser(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. Please use /seq/software/picard/current/bin/CreateSequenceDictionary.jar to create a sequence dictionary for your file", refFileName)); } @@ -470,7 +470,7 @@ public abstract class TraversalEngine { while (true) { ReferenceSequence ref = refFile.nextSequence(); logger.debug(String.format("%s %d %d", ref.getName(), ref.length(), System.currentTimeMillis())); - printProgress(true, "loci", new GenomeLoc("foo", 1)); + printProgress(true, "loci", GenomeLocParser.createGenomeLoc("foo", 1)); } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java index f38c58eb8..8cb5417f3 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.iterators.ReferenceIterator; import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.*; import java.io.File; @@ -141,7 +142,7 @@ public class TraverseByLocusWindows extends TraversalEngine { walker.nonIntervalReadAction(read); } else { - GenomeLoc loc = new GenomeLoc(read); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); // if we're in the current interval, add it to the list if ( currentInterval.overlapsP(loc) ) { intervalReads.add(read); @@ -243,7 +244,7 @@ public class TraverseByLocusWindows extends TraversalEngine { } } - GenomeLoc window = new GenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); + GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); LocusContext locus = new LocusContext(window, reads, null); if ( DOWNSAMPLE_BY_COVERAGE ) locus.downsampleToCoverage(downsamplingCoverage); @@ -255,8 +256,8 @@ public class TraverseByLocusWindows extends TraversalEngine { GenomeLoc loc = locus1.getLocation().merge(locus2.getLocation()); TreeSet set = new TreeSet(new Comparator() { public int compare(SAMRecord obj1, SAMRecord obj2) { - GenomeLoc myLoc = new GenomeLoc(obj1); - GenomeLoc hisLoc = new GenomeLoc(obj2); + GenomeLoc myLoc = GenomeLocParser.createGenomeLoc(obj1); + GenomeLoc hisLoc = GenomeLocParser.createGenomeLoc(obj2); int comparison = myLoc.compareTo(hisLoc); // if the reads have the same start position, we must give a non-zero comparison // (because java Sets often require "consistency with equals") diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 4360e70be..78b7bbbb5 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -1,25 +1,28 @@ package org.broadinstitute.sting.gatk.traversals; +import net.sf.picard.filter.FilteringIterator; +import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Pair; import java.io.File; -import java.util.*; - -import net.sf.picard.filter.FilteringIterator; -import net.sf.picard.filter.SamRecordFilter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; /** * @@ -66,12 +69,12 @@ public class TraverseDuplicates extends TraversalEngine { private List readsAtLoc(final SAMRecord read, PushbackIterator iter) { - GenomeLoc site = new GenomeLoc(read); + GenomeLoc site = GenomeLocParser.createGenomeLoc(read); ArrayList l = new ArrayList(); l.add(read); for (SAMRecord read2: iter) { - GenomeLoc site2 = new GenomeLoc(read2); + GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2); // the next read starts too late if ( site2.getStart() != site.getStart() ) { @@ -105,12 +108,12 @@ public class TraverseDuplicates extends TraversalEngine { // At this point, there are two possibilities, we have found at least one dup or not // if it's a dup, add it to the dups list, otherwise add it to the uniques list if ( key != null ) { - final GenomeLoc keyLoc = new GenomeLoc(key); - final GenomeLoc keyMateLoc = new GenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart()); + final GenomeLoc keyLoc = GenomeLocParser.createGenomeLoc(key); + final GenomeLoc keyMateLoc = GenomeLocParser.createGenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart()); for ( SAMRecord read : reads ) { - final GenomeLoc readLoc = new GenomeLoc(read); - final GenomeLoc readMateLoc = new GenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); + final GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); if (DEBUG) logger.debug(String.format("Examining reads at %s vs. %s at %s / %s vs. %s / %s%n", key.getReadName(), read.getReadName(), keyLoc, keyMateLoc, readLoc, readMateLoc)); // read and key start at the same place, and either the this read and the key @@ -150,7 +153,7 @@ public class TraverseDuplicates extends TraversalEngine { PushbackIterator iter = new PushbackIterator(readIter); for (SAMRecord read: iter) { // get the genome loc from the read - GenomeLoc site = new GenomeLoc(read); + GenomeLoc site = GenomeLocParser.createGenomeLoc(read); List reads = readsAtLoc(read, iter); Pair, List> split = splitDuplicates(reads); List uniqueReads = split.getFirst(); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index bf8a543e8..b74ea7223 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.File; import java.util.ArrayList; @@ -121,7 +122,7 @@ public class TraverseReads extends TraversalEngine { if (needsReferenceBasesP && read.getReferenceIndex() >= 0) { // get the genome loc from the read - GenomeLoc site = new GenomeLoc(read); + GenomeLoc site = GenomeLocParser.createGenomeLoc(read); // Jump forward in the reference to this locus location locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/PrepareROD.java b/java/src/org/broadinstitute/sting/playground/gatk/PrepareROD.java index ba5e69d91..a4d08ec42 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/PrepareROD.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/PrepareROD.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.playground.gatk; -import net.sf.samtools.SAMSequenceRecord; import net.sf.picard.cmdline.CommandLineProgram; import net.sf.picard.cmdline.Usage; import net.sf.picard.cmdline.Option; @@ -11,8 +10,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.refdata.*; import java.io.*; -import java.util.HashMap; -import java.util.List; import java.util.ArrayList; public class PrepareROD extends CommandLineProgram { @@ -33,7 +30,7 @@ public class PrepareROD extends CommandLineProgram { // Prepare the sort ordering w.r.t. the sequence dictionary final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG); - GenomeLoc.setupRefContigOrdering(refFile); + GenomeLocParser.setupRefContigOrdering(refFile); Class rodClass = ReferenceOrderedData.Types.get(ROD_TYPE.toLowerCase()).type; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java index 791aeafce..86139a62d 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java @@ -1,24 +1,20 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.cmdLine.Argument; -import java.util.Map; -import java.util.HashMap; -import java.util.Queue; -import java.util.LinkedList; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** - * User: hanna + * User: hanna * Date: Jun 10, 2009 * Time: 2:40:19 PM * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT @@ -99,7 +95,7 @@ public class CleanedReadInjector extends ReadWalker { */ @Override public Integer map(char[] ref, SAMRecord read) { - GenomeLoc loc = new GenomeLoc(read); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); while( loc.isPast(interval) && intervals.size() > 0 ) { interval = intervals.remove(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java index 33efe5f73..31e32c804 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java @@ -18,10 +18,10 @@ import org.broadinstitute.sting.gatk.refdata.RODIterator; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.rodRefSeq; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.playground.indels.AlignmentUtils; import org.broadinstitute.sting.playground.utils.CircularArray; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -82,7 +82,7 @@ public class IndelGenotyperWalker extends ReadWalker { int nSams = getToolkit().getArguments().samFiles.size(); - location = new GenomeLoc(0,1); + location = GenomeLocParser.createGenomeLoc(0,1); if ( call_somatic ) { if ( nSams != 2 ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java index 4a4f6cc31..41a85d39f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import net.sf.samtools.SAMRecord; import net.sf.samtools.AlignmentBlock; @@ -35,8 +36,8 @@ public class IndelIntervalWalker extends ReadWalker reads.add(r5); reads.add(r6); reads.add(r7); - clean(reads, reference, new GenomeLoc(0,0)); + clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0)); } private void testCleanWithDeletion() { @@ -971,7 +971,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker reads.add(r6); reads.add(r7); reads.add(r8); - clean(reads, reference, new GenomeLoc(0,0)); + clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0)); } public static String cigarToString(Cigar cig) { diff --git a/java/src/org/broadinstitute/sting/playground/indels/IndelInspectorMain.java b/java/src/org/broadinstitute/sting/playground/indels/IndelInspectorMain.java index 48a81a5a4..9adae4cc6 100755 --- a/java/src/org/broadinstitute/sting/playground/indels/IndelInspectorMain.java +++ b/java/src/org/broadinstitute/sting/playground/indels/IndelInspectorMain.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.playground.indels; import java.io.File; -import java.util.List; import java.util.Map; import java.util.HashMap; @@ -16,6 +15,7 @@ import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.*; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; public class IndelInspectorMain extends CommandLineProgram { @@ -55,10 +55,10 @@ public class IndelInspectorMain extends CommandLineProgram { System.out.println("No reference sequence dictionary found. Abort."); } - GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary()); GenomeLoc location = null; if ( GENOME_LOCATION != null ) { - location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION); + location = GenomeLocParser.parseGenomeLoc(GENOME_LOCATION); } if ( COUNT_CUTOFF == null ) COUNT_CUTOFF = 2; @@ -117,8 +117,8 @@ public class IndelInspectorMain extends CommandLineProgram { cur_contig = r.getReferenceName(); System.out.println("Contig "+cur_contig); // if contig is specified and we are past that contig, we are done: - if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break; - if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) { + if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 1 ) break; + if ( location == null || GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 0 ) { if ( location != null ) System.out.println("Time spent to scroll input bam file to the specified chromosome: "+ ((System.currentTimeMillis()-tc)/1000) + " seconds."); tc = System.currentTimeMillis(); contig_seq = reference.get(r.getReferenceIndex()); @@ -130,7 +130,7 @@ public class IndelInspectorMain extends CommandLineProgram { } // if contig is specified and we did not reach it yet, skip the records until we reach that contig: - if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue; + if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == -1 ) continue; if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue; @@ -327,7 +327,7 @@ public class IndelInspectorMain extends CommandLineProgram { setDefaultContigOrdering(); return; } - GenomeLoc.setupRefContigOrdering(h.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(h.getSequenceDictionary()); } private void setDefaultContigOrdering() { diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java index c90599a07..813f5b2a3 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java @@ -1,17 +1,10 @@ package org.broadinstitute.sting.utils; -import edu.mit.broad.picard.util.Interval; -import edu.mit.broad.picard.directed.IntervalList; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import java.io.File; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -26,253 +19,49 @@ import java.util.regex.Pattern; public class GenomeLoc implements Comparable, Cloneable { private static Logger logger = Logger.getLogger(GenomeLoc.class); + /** + * the basic components of a genome loc, its contig index, + * start and stop position, and (optionally) the contig name + */ private int contigIndex; private long start; private long stop; - - // -------------------------------------------------------------------------------------------------------------- - // - // Ugly global variable defining the optional ordering of contig elements - // - // -------------------------------------------------------------------------------------------------------------- - //public static Map refContigOrdering = null; - private static SAMSequenceDictionary contigInfo = null; - - public static boolean hasKnownContigOrdering() { - return contigInfo != null; - } - - - public static SAMSequenceRecord getContigInfo( final String contig ) { - return contigInfo.getSequence(contig); - } - - /** - * Returns the contig index of a specified string version of the contig - * @param contig the contig string - * @return the contig index, -1 if not found - */ - public static int getContigIndex( final String contig ) { - if (contigInfo.getSequenceIndex(contig) == -1) - Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); - - return contigInfo.getSequenceIndex(contig); - } - - public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) { - return setupRefContigOrdering(refFile.getSequenceDictionary()); - } - - public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) { - if (seqDict == null) { // we couldn't load the reference dictionary - logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); - Utils.scareUser("Failed to load reference dictionary"); - return false; - } else if ( contigInfo == null ){ - contigInfo = seqDict; - logger.debug(String.format("Prepared reference sequence contig dictionary")); - for (SAMSequenceRecord contig : seqDict.getSequences() ) { - logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); - } - } - - return true; - } + private String contigName; + static int MAX_CONTIG; // -------------------------------------------------------------------------------------------------------------- // // constructors // // -------------------------------------------------------------------------------------------------------------- - public GenomeLoc( int contigIndex, final long start, final long stop ) { - if(contigInfo == null) { throw new StingException("Contig info has not been setup in the GenomeLoc context yet."); } - - if (!isSequenceIndexValid(contigIndex)) { - throw new StingException("Contig info has not been setup in the GenomeLoc context yet."); - } + /*GenomeLoc( int contigIndex, final long start, final long stop ) { + MAX_CONTIG = Integer.MAX_VALUE; if (start < 0) { throw new StingException("Bad start position " + start);} if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position this.contigIndex = contigIndex; this.start = start; + this.contigName = null; // we just don't know this.stop = stop == -1 ? start : stop; + }*/ + + GenomeLoc(final SAMRecord read) { + this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()); } - public GenomeLoc(final SAMRecord read) { - this(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()); + GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) { + this.contigName = contig; + this.contigIndex = contigIndex; + this.start = start; + this.stop = stop; } - public GenomeLoc( final String contig, final long start, final long stop ) { - this(contigInfo.getSequenceIndex(contig), start, stop); - } - - public GenomeLoc( final String contig, final long pos ) { - this(contig, pos, pos); - } - - public GenomeLoc( final int contig, final long pos ) { + /*GenomeLoc( final int contig, final long pos ) { this(contig, pos, pos ); } - - public GenomeLoc( final GenomeLoc toCopy ) { - this( toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() ); - } - - // -------------------------------------------------------------------------------------------------------------- - // - // Parsing string representations - // - // -------------------------------------------------------------------------------------------------------------- - private static long parsePosition( final String pos ) { - String x = pos.replaceAll(",", ""); - return Long.parseLong(x); - } - - /** - * Use this static constructor when the input data is under limited control (i.e. parsing user data). - * @param contig Contig to parse. - * @param start Starting point. - * @param stop Stop point. - * @return The genome location, or a MalformedGenomeLocException if unparseable. - */ - public static GenomeLoc parseGenomeLoc( final String contig, long start, long stop ) { - if( !isContigValid(contig) ) - throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); - return new GenomeLoc(contig,start,stop); - } - - public static GenomeLoc parseGenomeLoc( final String str ) { - // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' - //System.out.printf("Parsing location '%s'%n", str); - - final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1 - final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2 - final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3 - final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4 - - String contig = null; - long start = 1; - long stop = Integer.MAX_VALUE; - boolean bad = false; - - Matcher match1 = regex1.matcher(str); - Matcher match2 = regex2.matcher(str); - Matcher match3 = regex3.matcher(str); - Matcher match4 = regex4.matcher(str); - - try { - if ( match1.matches() ) { - contig = match1.group(1); - } - else if ( match2.matches() ) { - contig = match2.group(1); - start = parsePosition(match2.group(2)); - stop = start; - } - else if ( match4.matches() ) { - contig = match4.group(1); - start = parsePosition(match4.group(2)); - } - else if ( match3.matches() ) { - contig = match3.group(1); - start = parsePosition(match3.group(2)); - stop = parsePosition(match3.group(3)); - - if ( start > stop ) - bad = true; - } - else { - bad = true; - } - } catch ( Exception e ) { - bad = true; - } - - if ( bad ) { - throw new StingException("Invalid Genome Location string: " + str); - } - - if ( stop == Integer.MAX_VALUE && hasKnownContigOrdering() ) { - // lookup the actually stop position! - stop = getContigInfo(contig).getSequenceLength(); - } - - if( !isContigValid(contig) ) - throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); - - GenomeLoc loc = parseGenomeLoc(contig,start,stop); - // System.out.printf(" => Parsed location '%s' into %s%n", str, loc); - - return loc; - } - - /** - * Useful utility function that parses a location string into a coordinate-order sorted - * array of GenomeLoc objects - * - * @param str String representation of genome locs. Null string corresponds to no filter. - * @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order - */ - public static List parseGenomeLocs(final String str) { - // Null string means no filter. - if( str == null ) return null; - - // Of the form: loc1;loc2;... - // Where each locN can be: - // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' - try { - List locs = new ArrayList(); - for( String loc: str.split(";") ) - locs.add( parseGenomeLoc(loc.trim()) ); - Collections.sort(locs); - //logger.info(String.format("Going to process %d locations", locs.length)); - locs = mergeOverlappingLocations(locs); - logger.debug("Locations are:" + Utils.join(", ", locs)); - return locs; - } catch (Exception e) { - e.printStackTrace(); - Utils.scareUser(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str)); - return null; - } - } - - public static List mergeOverlappingLocations(final List raw) { - logger.debug(" Raw locations are:\n" + Utils.join("\n", raw)); - if ( raw.size() <= 1 ) - return raw; - else { - ArrayList merged = new ArrayList(); - Iterator it = raw.iterator(); - GenomeLoc prev = it.next(); - while ( it.hasNext() ) { - GenomeLoc curr = it.next(); - if ( prev.contiguousP(curr) ) { - prev = prev.merge(curr); - } else { - merged.add(prev); - prev = curr; - } - } - merged.add(prev); - return merged; - } - } - - - /** - * Move this Genome loc to the next contig, with a start - * and stop of 1. - * @return true if we are not out of contigs, otherwise false if we're - * at the end of the genome (no more contigs to jump to). - */ - public boolean toNextContig() { - if ((contigIndex + 1) < GenomeLoc.contigInfo.size()) { - this.contigIndex++; - this.start = 1; - this.stop = 1; - return true; - } - return false; + */ + GenomeLoc( final GenomeLoc toCopy ) { + this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() ); } @@ -319,7 +108,7 @@ public class GenomeLoc implements Comparable, Cloneable { return returnTrueIfEmpty; // skip loci before intervals begin - if ( hasKnownContigOrdering() && curr.contigIndex < locs.get(0).contigIndex ) + if ( curr.contigIndex < locs.get(0).contigIndex ) return false; for ( GenomeLoc loc : locs ) { @@ -336,23 +125,7 @@ public class GenomeLoc implements Comparable, Cloneable { // Accessors and setters // public final String getContig() { - //this.contigIndex != -1; - if (!(contigInfo != null && contigInfo.getSequences() != null)) { - throw new StingException("The contig information or it's sequences are null"); - } - if ((this.contigIndex < 0) || (this.contigIndex >= contigInfo.getSequences().size())) { - throw new StingException("The contig index is not bounded by the zero and seqeunce count, contig index: " + contigIndex); - } - if (contigInfo.getSequence(this.contigIndex) == null || - contigInfo.getSequence(this.contigIndex).getSequenceName() == null) { - throw new StingException("The associated sequence index for contig " + contigIndex + " is null"); - } - return contigInfo.getSequence(this.contigIndex).getSequenceName(); - //if (contigInfo != null && contigInfo.getSequence(this.contigIndex) != null) { - // return contigInfo.getSequence(this.contigIndex).getSequenceName(); - //} - - //return null; + return this.contigName; } public final int getContigIndex() { return this.contigIndex; } @@ -372,7 +145,7 @@ public class GenomeLoc implements Comparable, Cloneable { public final boolean atBeginningOfContigP() { return this.start == 1; } public void setContig(String contig) { - this.contigIndex = contigInfo.getSequenceIndex(contig); + this.contigName = contig; } public void setStart(long start) { @@ -411,7 +184,7 @@ public class GenomeLoc implements Comparable, Cloneable { throw new StingException("The two genome loc's need to be contigous"); } - return new GenomeLoc(getContig(), + return new GenomeLoc(getContig(), this.contigIndex, Math.min(getStart(), that.getStart()), Math.max( getStop(), that.getStop()) ); } @@ -497,54 +270,11 @@ public class GenomeLoc implements Comparable, Cloneable { return new GenomeLoc(this); } - // - // Comparison operations - // - // TODO: get rid of this method because it's sloooooooooooooow - @Deprecated - public static int compareContigs( final String thisContig, final String thatContig ) - { - if ( thisContig == thatContig ) - { - // Optimization. If the pointers are equal, then the contigs are equal. - return 0; - } - - if ( hasKnownContigOrdering() ) - { - int thisIndex = getContigIndex(thisContig); - int thatIndex = getContigIndex(thatContig); - - if ( thisIndex == -1 ) - { - if ( thatIndex == -1 ) - { - // Use regular sorted order - return thisContig.compareTo(thatContig); - } - else - { - // this is always bigger if that is in the key set - return 1; - } - } - else if ( thatIndex == -1 ) - { - return -1; - } - else - { - if ( thisIndex < thatIndex ) return -1; - if ( thisIndex > thatIndex ) return 1; - return 0; - } - } - else - { - return thisContig.compareTo(thatContig); - } - } - + /** + * conpare this genomeLoc's contig to another genome loc + * @param that + * @return + */ public final int compareContigs( GenomeLoc that ) { if (this.contigIndex == that.contigIndex) return 0; @@ -570,64 +300,7 @@ public class GenomeLoc implements Comparable, Cloneable { } - /** - * Read a file of genome locations to process. - * regions specified by the location string. The string is of the form: - * Of the form: loc1;loc2;... - * Where each locN can be: - * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' - * - * @param file_name - */ - public static List IntervalFileToList(final String file_name) { -// first try to read it as an interval file since that's well structured - // we'll fail quickly if it's not a valid file. Then try to parse it as - // a location string file - List ret = null; - try { - IntervalList il = IntervalList.fromFile(new File(file_name)); - // iterate through the list of merged intervals and add then as GenomeLocs - ret = new ArrayList(); - for(Interval interval : il.getUniqueIntervals()) { - ret.add(new GenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd())); - } - return ret; - } catch (Exception e) { - try { - xReadLines reader = new xReadLines(new File(file_name)); - List lines = reader.readLines(); - reader.close(); - String locStr = Utils.join(";", lines); - logger.debug("locStr: " + locStr); - ret = parseGenomeLocs(locStr); - return ret; - } catch (Exception e2) { - logger.error("Attempt to parse interval file in GATK format failed: "+e2.getMessage()); - e2.printStackTrace(); - throw new StingException("Unable to parse out interval file in either format", e); - } - } - } - /** - * Determines whether the given contig is valid with respect to the sequence dictionary - * already installed in the GenomeLoc. - * @return True if the contig is valid. False otherwise. - */ - private static boolean isContigValid( String contig ) { - int contigIndex = contigInfo.getSequenceIndex(contig); - return isSequenceIndexValid(contigIndex); - } - - /** - * Determines whether the given sequence index is valid with respect to the sequence dictionary. - * @param sequenceIndex sequence index - * @return True if the sequence index is valid, false otherwise. - */ - private static boolean isSequenceIndexValid( int sequenceIndex ) { - return sequenceIndex >= 0 && sequenceIndex < contigInfo.size(); - - } } diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java new file mode 100644 index 000000000..36026442c --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -0,0 +1,484 @@ +package org.broadinstitute.sting.utils; + +import edu.mit.broad.picard.directed.IntervalList; +import edu.mit.broad.picard.util.Interval; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Created by IntelliJ IDEA. + * User: aaronmckenna + * Date: Jun 18, 2009 + * Time: 11:17:01 PM + * To change this template use File | Settings | File Templates. + */ +public class GenomeLocParser { + private static Logger logger = Logger.getLogger(GenomeLocParser.class); + + + // -------------------------------------------------------------------------------------------------------------- + // + // Ugly global variable defining the optional ordering of contig elements + // + // -------------------------------------------------------------------------------------------------------------- + //public static Map refContigOrdering = null; + private static SAMSequenceDictionary contigInfo = null; + + /** + * do we have a contig ordering setup? + * + * @return true if the contig order is setup + */ + public static boolean hasKnownContigOrdering() { + return contigInfo != null; + } + + /** + * get the contig's SAMSequenceRecord + * + * @param contig the string name of the contig + * + * @return the sam sequence record + */ + public static SAMSequenceRecord getContigInfo(final String contig) { + return contigInfo.getSequence(contig); + } + + /** + * Returns the contig index of a specified string version of the contig + * + * @param contig the contig string + * + * @return the contig index, -1 if not found + */ + public static int getContigIndex(final String contig) { + if (contigInfo.getSequenceIndex(contig) == -1) + Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); + + return contigInfo.getSequenceIndex(contig); + } + + /** + * set our internal reference contig order + * + * @param refFile the reference file + * + * @return true if we were successful + */ + public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) { + return setupRefContigOrdering(refFile.getSequenceDictionary()); + } + + /** + * setup our internal reference contig order + * + * @param seqDict the sequence dictionary + * + * @return true if we were successful + */ + public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) { + if (seqDict == null) { // we couldn't load the reference dictionary + logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); + Utils.scareUser("Failed to load reference dictionary"); + return false; + } else if (contigInfo == null) { + contigInfo = seqDict; + logger.debug(String.format("Prepared reference sequence contig dictionary")); + for (SAMSequenceRecord contig : seqDict.getSequences()) { + logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); + } + } + GenomeLoc.MAX_CONTIG = contigInfo.getSequences().size(); + return true; + } + + /** + * parse a genome location, from a location string + * + * @param str the string to parse + * + * @return a GenomeLoc representing the String + */ + public static GenomeLoc parseGenomeLoc(final String str) { + // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' + //System.out.printf("Parsing location '%s'%n", str); + + final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1 + final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2 + final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3 + final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4 + + String contig = null; + long start = 1; + long stop = Integer.MAX_VALUE; + boolean bad = false; + + Matcher match1 = regex1.matcher(str); + Matcher match2 = regex2.matcher(str); + Matcher match3 = regex3.matcher(str); + Matcher match4 = regex4.matcher(str); + + try { + if (match1.matches()) { + contig = match1.group(1); + } else if (match2.matches()) { + contig = match2.group(1); + start = parsePosition(match2.group(2)); + stop = start; + } else if (match4.matches()) { + contig = match4.group(1); + start = parsePosition(match4.group(2)); + } else if (match3.matches()) { + contig = match3.group(1); + start = parsePosition(match3.group(2)); + stop = parsePosition(match3.group(3)); + + if (start > stop) + bad = true; + } else { + bad = true; + } + } catch (Exception e) { + bad = true; + } + + if (bad) { + throw new StingException("Invalid Genome Location string: " + str); + } + + if (stop == Integer.MAX_VALUE && hasKnownContigOrdering()) { + // lookup the actually stop position! + stop = getContigInfo(contig).getSequenceLength(); + } + + if (!isContigValid(contig)) + throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); + + GenomeLoc loc = parseGenomeLoc(contig, start, stop); + // System.out.printf(" => Parsed location '%s' into %s%n", str, loc); + + return loc; + } + + /** + * Useful utility function that parses a location string into a coordinate-order sorted + * array of GenomeLoc objects + * + * @param str String representation of genome locs. Null string corresponds to no filter. + * + * @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order + */ + public static List parseGenomeLocs(final String str) { + // Null string means no filter. + if (str == null) return null; + + // Of the form: loc1;loc2;... + // Where each locN can be: + // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' + try { + List locs = new ArrayList(); + for (String loc : str.split(";")) + locs.add(parseGenomeLoc(loc.trim())); + Collections.sort(locs); + //logger.info(String.format("Going to process %d locations", locs.length)); + locs = mergeOverlappingLocations(locs); + logger.debug("Locations are:" + Utils.join(", ", locs)); + return locs; + } catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out + throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str),e); + } + } + + // -------------------------------------------------------------------------------------------------------------- + // + // Parsing string representations + // + // -------------------------------------------------------------------------------------------------------------- + private static long parsePosition(final String pos) { + String x = pos.replaceAll(",", ""); + return Long.parseLong(x); + } + + + /** + * merge a list of genome locs that may be overlapping, returning the list of unique genomic locations + * + * @param raw the unchecked genome loc list + * + * @return the list of merged locations + */ + public static List mergeOverlappingLocations(final List raw) { + logger.debug(" Raw locations are:\n" + Utils.join("\n", raw)); + if (raw.size() <= 1) + return raw; + else { + ArrayList merged = new ArrayList(); + Iterator it = raw.iterator(); + GenomeLoc prev = it.next(); + while (it.hasNext()) { + GenomeLoc curr = it.next(); + if (prev.contiguousP(curr)) { + prev = prev.merge(curr); + } else { + merged.add(prev); + prev = curr; + } + } + merged.add(prev); + return merged; + } + } + + /** + * Determines whether the given contig is valid with respect to the sequence dictionary + * already installed in the GenomeLoc. + * + * @return True if the contig is valid. False otherwise. + */ + private static boolean isContigValid(String contig) { + int contigIndex = contigInfo.getSequenceIndex(contig); + return isSequenceIndexValid(contigIndex); + } + + /** + * Determines whether the given sequence index is valid with respect to the sequence dictionary. + * + * @param sequenceIndex sequence index + * + * @return True if the sequence index is valid, false otherwise. + */ + private static boolean isSequenceIndexValid(int sequenceIndex) { + return sequenceIndex >= 0 && sequenceIndex < contigInfo.size(); + + } + + /** + * Use this static constructor when the input data is under limited control (i.e. parsing user data). + * + * @param contig Contig to parse. + * @param start Starting point. + * @param stop Stop point. + * + * @return The genome location, or a MalformedGenomeLocException if unparseable. + */ + public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) { + if (!isContigValid(contig)) + throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); + return new GenomeLoc(contig, getContigIndex(contig), start, stop); + } + + + /** + * Read a file of genome locations to process. + * regions specified by the location string. The string is of the form: + * Of the form: loc1;loc2;... + * Where each locN can be: + * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' + * + * @param file_name + */ + public static List intervalFileToList(final String file_name) { + /** + * first try to read it as an interval file since that's well structured + * we'll fail quickly if it's not a valid file. Then try to parse it as + * a location string file + */ + List ret = null; + try { + IntervalList il = IntervalList.fromFile(new File(file_name)); + + // iterate through the list of merged intervals and add then as GenomeLocs + ret = new ArrayList(); + for (Interval interval : il.getUniqueIntervals()) { + ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence()), interval.getStart(), interval.getEnd())); + } + return ret; + + } catch (Exception e) { + try { + xReadLines reader = new xReadLines(new File(file_name)); + List lines = reader.readLines(); + reader.close(); + String locStr = Utils.join(";", lines); + logger.debug("locStr: " + locStr); + ret = parseGenomeLocs(locStr); + return ret; + } catch (Exception e2) { + logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage()); + e2.printStackTrace(); + throw new StingException("Unable to parse out interval file in either format", e); + } + } + } + + /** + * get the sequence name from a sequence index + * + * @param contigIndex get the contig index + * + * @return the string that represents that contig name + */ + private static String getSequenceNameFromIndex(int contigIndex) { + return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName(); + } + + /** + * create a genome loc, given the contig name, start, and stop + * + * @param contig the contig name + * @param start the starting position + * @param stop the stop position + * + * @return a new genome loc + */ + public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) { + checkSetup(); + return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), start, stop)); + } + + /** + * create a genome loc, given the contig index, start, and stop + * + * @param contigIndex the contig index + * @param start the start position + * @param stop the stop position + * + * @return a new genome loc + */ + public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) { + checkSetup(); + if (start < 0) { + throw new StingException("Bad start position " + start); + } + if (stop < -1) { + throw new StingException("Bad stop position " + stop); + } // a negative -1 indicates it's not a meaningful end position + + + return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop); + } + + /** + * create a genome loc, given a read + * + * @param read + * + * @return + */ + public static GenomeLoc createGenomeLoc(final SAMRecord read) { + checkSetup(); + return verifyGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd())); + } + + + /** + * create a new genome loc, given the contig position, and a single position + * + * @param contig the contig name + * @param pos the postion + * + * @return a genome loc representing a single base at the specified postion on the contig + */ + public static GenomeLoc createGenomeLoc(final int contig, final long pos) { + checkSetup(); + return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos)); + } + + /** + * create a new genome loc, given the contig name, and a single position + * + * @param contig the contig name + * @param pos the postion + * + * @return a genome loc representing a single base at the specified postion on the contig + */ + public static GenomeLoc createGenomeLoc(final String contig, final long pos) { + checkSetup(); + return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), pos, pos)); + } + + public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) { + checkSetup(); + return verifyGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop())); + } + + /** + * verify the specified genome loc is valid, if it's not, throw an exception + * + * @param toReturn teh genome loc we're about to return + * + * @return the genome loc if it's valid, otherwise we throw an exception + */ + private static GenomeLoc verifyGenomeLoc(GenomeLoc toReturn) { + // conditions to fail on - we currently use a start of zero to indicate infinite read count, so don't check for that + //if ((toReturn.getStop() < toReturn.getStart())) { + // throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is after the stop (Start = " + toReturn.getStart() + " stop = " + toReturn.getStop() + ")"); + //} + if (toReturn.getStart() < 0) { + throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0"); + } + if (toReturn.getStop() < 0) { + throw new StingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0"); + } + if (toReturn.getContigIndex() < 0) { + throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is less than 0"); + } + if (toReturn.getContigIndex() >= contigInfo.getSequences().size()) { + throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is greater then the stored sequence count"); + + } + return toReturn; + + } + + + /** + * Move this Genome loc to the next contig, with a start + * and stop of 1. + * + * @return true if we are not out of contigs, otherwise false if we're + * at the end of the genome (no more contigs to jump to). + */ + public static GenomeLoc toNextContig(GenomeLoc current) { + if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) { + return null; + } else + return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1)); + } + + /** + * check to make sure that we've setup the contig information + */ + private static void checkSetup() { + if (contigInfo == null) { + throw new StingException("The GenomeLocParser hasn't been setup with a contig sequence yet"); + } + } + + /** + * compare two contig names, in the current context + * + * @param firstContig + * @param secondContig + * + * @return + */ + public static int compareContigs(String firstContig, String secondContig) { + checkSetup(); + Integer ref1 = GenomeLocParser.getContigIndex(firstContig); + Integer ref2 = GenomeLocParser.getContigIndex(secondContig); + return ref1.compareTo(ref2); + + } +} diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index 6e57f6a49..53f62b95d 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -168,8 +168,8 @@ public class GenomeLocSortedSet extends AbstractSet { * |------| + |--------| * */ - GenomeLoc before = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); - GenomeLoc after = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); + GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); int index = mArray.indexOf(g); if (after.getStop() - after.getStart() > 0) { mArray.add(index, after); @@ -207,9 +207,9 @@ public class GenomeLocSortedSet extends AbstractSet { */ if (e.getStart() < g.getStart()) { - l = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); } else { - l = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); + l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); } // replace g with the new region mArray.set(mArray.indexOf(g), l); @@ -230,7 +230,7 @@ public class GenomeLocSortedSet extends AbstractSet { public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(); for (SAMSequenceRecord record : dict.getSequences()) { - returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength())); + returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength())); } return returnSortedSet; } @@ -258,7 +258,7 @@ public class GenomeLocSortedSet extends AbstractSet { GenomeLocSortedSet ret = new GenomeLocSortedSet(); for (GenomeLoc loc : this.mArray) { // ensure a deep copy - ret.mArray.add(new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop())); + ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop())); } return ret; } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index 3b293a0fc..f84955040 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -7,8 +7,6 @@ import net.sf.samtools.util.CloseableIterator; import java.io.InputStream; import java.io.ByteArrayInputStream; import java.io.UnsupportedEncodingException; -import java.io.File; -import java.util.Collections; import java.util.List; import java.util.Arrays; import java.util.Iterator; @@ -16,8 +14,7 @@ import java.util.ArrayList; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * User: hanna * Date: Jun 11, 2009 @@ -54,11 +51,11 @@ public class ArtificialSAMFileReader extends SAMFileReader { */ @Override public CloseableIterator query(final String sequence, final int start, final int end, final boolean contained) { - GenomeLoc region = new GenomeLoc(sequence, start, end); + GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end); List coveredSubset = new ArrayList(); for( SAMRecord read: reads ) { - GenomeLoc readPosition = new GenomeLoc(read); + GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read); if( contained && region.containsP(readPosition) ) coveredSubset.add(read); else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java index 56c630153..394ce9c3b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.junit.Assert; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.LocusContext; import net.sf.samtools.SAMRecord; @@ -40,13 +41,13 @@ public class AllLocusViewTest extends LocusViewTemplate { AllLocusView allLocusView = (AllLocusView)view; for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = new GenomeLoc("chr1",i); + GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); LocusContext locusContext = allLocusView.next(); Assert.assertEquals("Locus context location is incorrect", site, locusContext.getLocation() ); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if(new GenomeLoc(read).containsP(locusContext.getLocation())) { + if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) ); expectedReadsAtSite++; } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java index 3b3fcb525..d5fb19c53 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.junit.Assert; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.LocusContext; import net.sf.samtools.SAMRecord; @@ -43,11 +44,11 @@ public class CoveredLocusViewTest extends LocusViewTemplate { CoveredLocusView coveredLocusView = (CoveredLocusView)view; for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = new GenomeLoc("chr1",i); + GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if( new GenomeLoc(read).containsP(site) ) + if( GenomeLocParser.createGenomeLoc(read).containsP(site) ) expectedReadsAtSite++; } @@ -61,7 +62,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate { Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size()); for( SAMRecord read: reads ) { - if(new GenomeLoc(read).containsP(locusContext.getLocation())) + if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) ); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java index 48563d5f8..cdf4b4feb 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.junit.Test; import org.junit.Assert; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; @@ -32,7 +33,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { */ @Test(expected=InvalidPositionException.class) public void testSingleBPFailure() { - Shard shard = new LocusShard( new GenomeLoc(0,1,50) ); + Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) ); ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null); LocusReferenceView view = new LocusReferenceView(dataProvider); @@ -45,12 +46,12 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { */ @Test(expected=InvalidPositionException.class) public void testBoundsFailure() { - Shard shard = new LocusShard( new GenomeLoc(0,1,50) ); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) ); ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null); LocusReferenceView view = new LocusReferenceView(dataProvider); - view.getReferenceBase(new GenomeLoc(0,51)); + view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51)); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index d173ad899..e097e2eb8 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -1,31 +1,21 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.junit.BeforeClass; import org.junit.Test; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; -import org.broadinstitute.sting.gatk.Reads; -import java.io.FileNotFoundException; import java.io.File; -import java.util.Collections; -import java.util.List; -import java.util.Iterator; -import java.util.ArrayList; -import java.util.Arrays; - -import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.picard.reference.ReferenceSequence; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; +import java.io.FileNotFoundException; +import java.util.*; /** * User: hanna * Date: May 13, 2009 @@ -49,14 +39,14 @@ public abstract class LocusViewTemplate extends BaseTest { @BeforeClass public static void setupGenomeLoc() throws FileNotFoundException { sequenceSourceFile = fakeReferenceSequenceFile(); - GenomeLoc.setupRefContigOrdering(sequenceSourceFile); + GenomeLocParser.setupRefContigOrdering(sequenceSourceFile); } @Test public void emptyLocusContextTest() { SAMRecordIterator iterator = new SAMRecordIterator(); - GenomeLoc shardBounds = new GenomeLoc("chr1",1,5); + GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5); Shard shard = new LocusShard(shardBounds); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); @@ -70,7 +60,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",1,5); SAMRecordIterator iterator = new SAMRecordIterator(read); - GenomeLoc shardBounds = new GenomeLoc("chr1",1,5); + GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5); Shard shard = new LocusShard(shardBounds); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); @@ -84,7 +74,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",1,5); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -96,7 +86,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",6,10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -108,7 +98,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",3,7); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -120,7 +110,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",1,10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new LocusShard(new GenomeLoc("chr1",6,15)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -132,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1",6,15); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -145,7 +135,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read2 = buildSAMRecord("chr1",6,10); SAMRecordIterator iterator = new SAMRecordIterator(read1,read2); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -162,7 +152,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1",6,10); SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -179,7 +169,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1",5,9); SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -198,7 +188,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read6 = buildSAMRecord("chr1",6,10); SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4,read5,read6); - Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); @@ -224,7 +214,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read01,read02,read03,read04,read05,read06, read07,read08,read09,read10,read11,read12); - Shard shard = new LocusShard(new GenomeLoc("chr1",6,15)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); LocusView view = createView( dataProvider ); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java index a0a01ce9e..0157bd3d9 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java @@ -4,7 +4,7 @@ import org.junit.Test; import org.junit.BeforeClass; import org.junit.Assert; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.TabularROD; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -44,7 +44,7 @@ public class ReferenceOrderedViewTest extends BaseTest { public static void init() throws FileNotFoundException { // sequence seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq); + GenomeLocParser.setupRefContigOrdering(seq); } /** @@ -52,11 +52,11 @@ public class ReferenceOrderedViewTest extends BaseTest { */ @Test public void testNoBindings() { - Shard shard = new LocusShard(new GenomeLoc("chrM",1,30)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30)); ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.emptyList()); ReferenceOrderedView view = new ReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",10)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10)); Assert.assertNull("The tracker should not have produced any data", tracker.lookup("tableTest",null)); } @@ -69,12 +69,12 @@ public class ReferenceOrderedViewTest extends BaseTest { ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class); ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod); - Shard shard = new LocusShard(new GenomeLoc("chrM",1,30)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30)); ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); TabularROD datum = (TabularROD)tracker.lookup("tableTest",null); Assert.assertEquals("datum parameter for COL1 is incorrect", "C", datum.get("COL1")); @@ -95,12 +95,12 @@ public class ReferenceOrderedViewTest extends BaseTest { ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2); - Shard shard = new LocusShard(new GenomeLoc("chrM",1,30)); + Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30)); ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); TabularROD datum1 = (TabularROD)tracker.lookup("tableTest1",null); Assert.assertEquals("datum1 parameter for COL1 is incorrect", "C", datum1.get("COL1")); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java index dbf2050a3..bf7bcdb9b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.junit.BeforeClass; import org.junit.Test; -import java.io.FileNotFoundException; import java.io.File; - -import net.sf.samtools.SAMSequenceRecord; +import java.io.FileNotFoundException; /** * User: hanna * Date: May 27, 2009 @@ -43,7 +43,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { @BeforeClass public static void initialize() throws FileNotFoundException { sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") ); - GenomeLoc.setupRefContigOrdering(sequenceFile); + GenomeLocParser.setupRefContigOrdering(sequenceFile); } /** @@ -51,7 +51,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { */ @Test public void testReferenceStart() { - validateLocation( new GenomeLoc(0,1,25) ); + validateLocation( GenomeLocParser.createGenomeLoc(0,1,25) ); } /** @@ -63,7 +63,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); final long contigStart = selectedContig.getSequenceLength() - 24; final long contigStop = selectedContig.getSequenceLength(); - validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); } /** @@ -74,7 +74,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); - validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) ); + validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),1,25) ); } @@ -88,7 +88,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); final long contigStart = selectedContig.getSequenceLength() - 24; final long contigStop = selectedContig.getSequenceLength(); - validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); } protected abstract void validateLocation( GenomeLoc loc ); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyTest.java index ed18bfd9c..1c468ab51 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyTest.java @@ -7,6 +7,7 @@ import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.BaseTest; import net.sf.samtools.SAMFileHeader; @@ -60,7 +61,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Before public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); mSortedSet = new GenomeLocSortedSet(); } @@ -71,7 +72,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Test public void testSingleChromosomeFunctionality() { - GenomeLoc loc = new GenomeLoc(1, 1, 1000); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000); mSortedSet.add(loc); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet); int counter = 0; @@ -87,7 +88,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Test public void testMultipleChromosomeFunctionality() { for (int x = 0; x < 5; x++) { - GenomeLoc loc = new GenomeLoc(x, 1, 1000); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); mSortedSet.add(loc); } IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet); @@ -104,7 +105,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Test public void testOddSizeShardFunctionality() { for (int x = 0; x < 5; x++) { - GenomeLoc loc = new GenomeLoc(x, 1, 1000); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); mSortedSet.add(loc); } IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet); @@ -122,7 +123,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Test public void testInfiniteShardSize() { for (int x = 0; x < 5; x++) { - GenomeLoc loc = new GenomeLoc(x, 1, 1000); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); mSortedSet.add(loc); } IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet); @@ -137,7 +138,7 @@ public class IntervalShardStrategyTest extends BaseTest { @Test(expected = UnsupportedOperationException.class) public void testRemove() { - GenomeLoc loc = new GenomeLoc(1, 1, 1000); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000); mSortedSet.add(loc); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet); strat.remove(); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardTest.java index 9e2779cff..759436edc 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.Before; import org.junit.Test; @@ -51,20 +52,20 @@ public class IntervalShardTest extends BaseTest { @Before public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); } @Test public void simpleReturn() { - GenomeLoc loc = new GenomeLoc(1, 1, 100); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); intervalShard = new IntervalShard(loc); assertTrue(intervalShard.getGenomeLoc().equals(loc)); } @Test public void ensureNotReference() { - GenomeLoc loc = new GenomeLoc(1, 1, 100); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); intervalShard = new IntervalShard(loc); assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc)); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyTest.java index c1eafd637..bafc4157a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyTest.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.BaseTest; import org.junit.Before; @@ -52,7 +52,7 @@ public class LinearLocusShardStrategyTest extends BaseTest { @Before public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); } @Test diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryTest.java index 3283f328b..0291c50c3 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryTest.java @@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.*; import static org.junit.Assert.assertTrue; @@ -42,7 +43,7 @@ public class ShardStrategyFactoryTest extends BaseTest { @Before public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); set = new GenomeLocSortedSet(); } @@ -54,7 +55,7 @@ public class ShardStrategyFactoryTest extends BaseTest { @Test public void testReadInterval() { - GenomeLoc l = new GenomeLoc(0,1,100); + GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); set.add(l); ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set); assertTrue(st instanceof IntervalShardStrategy); @@ -74,7 +75,7 @@ public class ShardStrategyFactoryTest extends BaseTest { @Test public void testExpInterval() { - GenomeLoc l = new GenomeLoc(0,1,100); + GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); set.add(l); ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set); assertTrue(st instanceof ExpGrowthLocusShardStrategy); @@ -82,7 +83,7 @@ public class ShardStrategyFactoryTest extends BaseTest { @Test public void testLinearInterval() { - GenomeLoc l = new GenomeLoc(0,1,100); + GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); set.add(l); ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set); assertTrue(st instanceof LinearLocusShardStrategy); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java index d078758ae..fc1d93be0 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java @@ -8,6 +8,7 @@ import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.TabularROD; @@ -38,13 +39,13 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { private ReferenceOrderedData rod = null; - private final GenomeLoc testSite1 = new GenomeLoc("chrM",10); - private final GenomeLoc testSite2 = new GenomeLoc("chrM",20); - private final GenomeLoc testSite3 = new GenomeLoc("chrM",30); + private final GenomeLoc testSite1 = GenomeLocParser.createGenomeLoc("chrM",10); + private final GenomeLoc testSite2 = GenomeLocParser.createGenomeLoc("chrM",20); + private final GenomeLoc testSite3 = GenomeLocParser.createGenomeLoc("chrM",30); @BeforeClass public static void init() throws FileNotFoundException { - GenomeLoc.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile)); + GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile)); TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java index 589ee054a..3cf2a42d0 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.junit.After; import org.junit.Before; @@ -60,7 +60,7 @@ public class SAMBAMDataSourceTest extends BaseTest { // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java index c45d5bb1e..ce46969dc 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator; import static org.junit.Assert.assertEquals; @@ -76,7 +77,7 @@ public class SAMByReadsTest extends BaseTest { @Test public void testToUnmappedReads() { ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000); - GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); try { int unmappedReadsSeen = 0; int iterations = 0; @@ -109,7 +110,7 @@ public class SAMByReadsTest extends BaseTest { @Test public void testShardingOfReadsSize14() { ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000); - GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); targetReadCount = 14; try { int iterations = 0; @@ -159,7 +160,7 @@ public class SAMByReadsTest extends BaseTest { @Test public void testShardingOfReadsSize25() { ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000); - GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); targetReadCount = 25; try { int iterations = 0; diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java index da6bce391..548324f70 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java @@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException; import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -73,7 +73,7 @@ public class BoundedReadIteratorTest extends BaseTest { // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java index fb0cb70f1..1111ea982 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java @@ -8,15 +8,13 @@ import org.junit.*; import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; -import org.broadinstitute.sting.utils.RefHanger; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.File; import java.io.PrintStream; import java.io.FileOutputStream; import java.io.FileNotFoundException; import java.util.Arrays; -import java.util.List; import java.util.ArrayList; /** @@ -33,7 +31,7 @@ public class TabularRODTest extends BaseTest { public static void init() { // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq); + GenomeLocParser.setupRefContigOrdering(seq); } @Before @@ -50,7 +48,7 @@ public class TabularRODTest extends BaseTest { logger.warn("Executing test1"); TabularROD one = (TabularROD)iter.next(); assertTrue(one.size() == 4); - assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 10))); + assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one.get("COL1").equals("A")); assertTrue(one.get("COL2").equals("B")); assertTrue(one.get("COL3").equals("C")); @@ -62,7 +60,7 @@ public class TabularRODTest extends BaseTest { TabularROD one = (TabularROD)iter.next(); TabularROD two = (TabularROD)iter.next(); assertTrue(two.size() == 4); - assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20))); + assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); assertTrue(two.get("COL2").equals("D")); assertTrue(two.get("COL3").equals("E")); @@ -75,7 +73,7 @@ public class TabularRODTest extends BaseTest { TabularROD two = (TabularROD)iter.next(); TabularROD three = (TabularROD)iter.next(); assertTrue(three.size() == 4); - assertTrue(three.getLocation().equals(new GenomeLoc("chrM", 30))); + assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30))); assertTrue(three.get("COL1").equals("F")); assertTrue(three.get("COL2").equals("G")); assertTrue(three.get("COL3").equals("H")); @@ -93,9 +91,9 @@ public class TabularRODTest extends BaseTest { @Test public void testSeek() { logger.warn("Executing testSeek"); - TabularROD two = (TabularROD)iter.seekForward(new GenomeLoc("chrM", 20)); + TabularROD two = (TabularROD)iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20)); assertTrue(two.size() == 4); - assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20))); + assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); assertTrue(two.get("COL2").equals("D")); assertTrue(two.get("COL3").equals("E")); @@ -118,7 +116,7 @@ public class TabularRODTest extends BaseTest { logger.warn("Executing testDelim1"); TabularROD one2 = (TabularROD)iter_commas.next(); assertTrue(one2.size() == 5); - assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10))); + assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); assertTrue(one2.get("COL2").equals("B")); assertTrue(one2.get("COL3").equals("C")); @@ -135,7 +133,7 @@ public class TabularRODTest extends BaseTest { logger.warn("Executing testDelim1"); TabularROD one2 = (TabularROD)iter_commas.next(); assertTrue(one2.size() == 5); - assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10))); + assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); assertTrue(one2.get("COL2").equals("B")); assertTrue(one2.get("COL3").equals("C")); @@ -148,7 +146,7 @@ public class TabularRODTest extends BaseTest { ArrayList header = new ArrayList(Arrays.asList("HEADER", "col1", "col2", "col3")); assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3")); String rowData = String.format("%d %d %d", 1, 2, 3); - TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" ")); + TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" ")); System.out.println(">>>>> " + row.toString()); assertTrue(row.toString().equals("chrM:1\t1\t2\t3")); } @@ -166,11 +164,11 @@ public class TabularRODTest extends BaseTest { out.println(TabularROD.headerString(header)); String rowData = String.format("%d %d %d", 1, 2, 3); - TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" ")); + TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" ")); out.println(row.toString()); rowData = String.format("%d %d %d", 3, 4, 5); - row = new TabularROD("myName", header, new GenomeLoc("chrM", 2), rowData.split(" ")); + row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 2), rowData.split(" ")); out.println(row.toString()); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class); @@ -178,14 +176,14 @@ public class TabularRODTest extends BaseTest { TabularROD one = (TabularROD)iter_commas.next(); assertTrue(one.size() == 4); - assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 1))); + assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1))); assertTrue(one.get("col1").equals("1")); assertTrue(one.get("col2").equals("2")); assertTrue(one.get("col3").equals("3")); TabularROD two = (TabularROD)iter_commas.next(); assertTrue(two.size() == 4); - assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 2))); + assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2))); assertTrue(two.get("col1").equals("3")); assertTrue(two.get("col2").equals("4")); assertTrue(two.get("col3").equals("5")); @@ -195,14 +193,14 @@ public class TabularRODTest extends BaseTest { public void testBadHeader1() { logger.warn("Executing testBadHeader1"); ArrayList header = new ArrayList(); - TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1)); + TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1)); } @Test (expected=RuntimeException.class ) public void testBadHeader2() { logger.warn("Executing testBadHeader2"); ArrayList header = new ArrayList(Arrays.asList("col1", "col2", "col3")); - TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1)); + TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1)); } @Test (expected=RuntimeException.class ) @@ -211,6 +209,6 @@ public class TabularRODTest extends BaseTest { ArrayList header = new ArrayList(Arrays.asList("HEADER", "col1", "col2", "col3")); assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3")); String rowData = String.format("%d %d %d %d", 1, 2, 3, 4); - TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" ")); + TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" ")); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index aaaa89c94..5f8f2c1c6 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.traversals; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; @@ -10,8 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import static org.junit.Assert.fail; @@ -116,7 +116,7 @@ public class TraverseReadsTest extends BaseTest { catch (FileNotFoundException ex) { throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex); } - GenomeLoc.setupRefContigOrdering(ref); + GenomeLocParser.setupRefContigOrdering(ref); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, ref.getSequenceDictionary(), @@ -148,7 +148,7 @@ public class TraverseReadsTest extends BaseTest { fail("Count read walker should return an interger."); } if (((Integer) accumulator) != 9721) { - fail("there should be 9721 mapped reads in the index file"); + fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) ); } } @@ -156,7 +156,6 @@ public class TraverseReadsTest extends BaseTest { /** Test out that we can shard the file and iterate over every read */ @Test public void testUnmappedReadCount() { - IndexedFastaSequenceFile ref = null; try { ref = new IndexedFastaSequenceFile(refFile); @@ -164,7 +163,7 @@ public class TraverseReadsTest extends BaseTest { catch (FileNotFoundException ex) { throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex); } - GenomeLoc.setupRefContigOrdering(ref); + GenomeLocParser.setupRefContigOrdering(ref); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, ref.getSequenceDictionary(), @@ -195,7 +194,7 @@ public class TraverseReadsTest extends BaseTest { fail("Count read walker should return an interger."); } if (((Integer) accumulator) != 10000) { - fail("there should be 10000 mapped reads in the index file"); + fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator)); } } diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java index 0a3129a8e..4ec88c906 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.OutputTracker; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader; import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -47,7 +47,7 @@ public class CleanedReadInjectorTest extends BaseTest { @BeforeClass public static void initialize() throws FileNotFoundException { sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") ); - GenomeLoc.setupRefContigOrdering(sequenceFile); + GenomeLocParser.setupRefContigOrdering(sequenceFile); } @Test diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java new file mode 100644 index 000000000..2cdfd9ad7 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java @@ -0,0 +1,126 @@ +package org.broadinstitute.sting.utils; + +import static junit.framework.Assert.assertTrue; +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import static org.junit.Assert.assertEquals; +import org.junit.Test; + + +/** + * @author aaron + *

+ * Class GenomeLocParserTest + *

+ * Test out the functionality of the new genome loc parser + */ +public class GenomeLocParserTest extends BaseTest { + + @Test(expected = StingException.class) + public void testUnsetupException() { + GenomeLocParser.createGenomeLoc(0, 0, 0); + } + + @Test + public void testKnownContigOrder() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); + // assert that it's false when the contig ordering is not setup + assertTrue(!GenomeLocParser.hasKnownContigOrdering()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + // assert that it's true when it is setup + assertTrue(GenomeLocParser.hasKnownContigOrdering()); + } + + @Test(expected = RuntimeException.class) + public void testGetContigIndex() { + assertEquals(-1, GenomeLocParser.getContigIndex("blah")); // should be in the reference + } + + @Test + public void testGetContigIndexValid() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); + assertEquals(0, GenomeLocParser.getContigIndex("chr1")); // should be in the reference + } + + @Test + public void testGetContigInfoUnknownContig() { + assertEquals(null, GenomeLocParser.getContigInfo("blah")); // should be in the reference + } + + + @Test + public void testGetContigInfoKnownContig() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); + assertEquals("chr1".compareTo(GenomeLocParser.getContigInfo("chr1").getSequenceName()), 0); // should be in the reference + } + + @Test(expected = StingException.class) + public void testParseBadString() { + GenomeLocParser.parseGenomeLoc("Bad:0-1"); + } + + @Test + public void testParseGoodString() { + GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-100"); + assertEquals(loc.getContigIndex(), 0); + assertEquals(100, loc.getStop()); + assertEquals(1, loc.getStart()); + } + + @Test(expected = RuntimeException.class) + public void testParseBadLocations() { + GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0"); + } + + @Test + public void testParseGoodLocations() { + GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9"); + } + + @Test(expected = RuntimeException.class) + public void testParseGoodLocationsTooManySemiColons() { + GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;"); + } + + @Test + public void testCreateGenomeLoc1() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100); + assertEquals(loc.getContigIndex(), 0); + assertEquals(100, loc.getStop()); + assertEquals(1, loc.getStart()); + } + + @Test + public void testCreateGenomeLoc2() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); + assertEquals(loc.getContigIndex(), 0); + assertEquals(100, loc.getStop()); + assertEquals(1, loc.getStart()); + } + + @Test + public void testCreateGenomeLoc3() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1); + assertEquals(loc.getContigIndex(), 0); + assertEquals(1, loc.getStop()); + assertEquals(1, loc.getStart()); + } + + @Test + public void testCreateGenomeLoc4() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1); + assertEquals(loc.getContigIndex(), 0); + assertEquals(1, loc.getStop()); + assertEquals(1, loc.getStart()); + } + + @Test + public void testCreateGenomeLoc5() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,100); + GenomeLoc copy = GenomeLocParser.createGenomeLoc(loc); + assertEquals(copy.getContigIndex(), 0); + assertEquals(100, copy.getStop()); + assertEquals(1, copy .getStart()); + } +} diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java index 79d591673..85bd804db 100755 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java @@ -44,13 +44,13 @@ public class GenomeLocSortedSetTest extends BaseTest { @Before public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); mSortedSet = new GenomeLocSortedSet(); } @Test public void testAdd() { - GenomeLoc g = new GenomeLoc(1, 0, 0); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -59,7 +59,7 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void testRemove() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = new GenomeLoc(1, 0, 0); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.remove(g); @@ -69,9 +69,9 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void addRegion() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = new GenomeLoc(1, 1, 50); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 1, 50); mSortedSet.add(g); - GenomeLoc f = new GenomeLoc(1, 30, 80); + GenomeLoc f = GenomeLocParser.createGenomeLoc(1, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 1); @@ -81,7 +81,7 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test(expected = IllegalArgumentException.class) public void testAddDupplicate() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = new GenomeLoc(1, 0, 0); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.add(g); @@ -89,8 +89,8 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void mergingOverlappingBelow() { - GenomeLoc g = new GenomeLoc(1, 0, 50); - GenomeLoc e = new GenomeLoc(1, 49, 100); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 50); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -105,8 +105,8 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void mergingOverlappingAbove() { - GenomeLoc e = new GenomeLoc(1, 0, 50); - GenomeLoc g = new GenomeLoc(1, 49, 100); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -121,13 +121,13 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void deleteSubRegion() { - GenomeLoc e = new GenomeLoc(1, 0, 50); - GenomeLoc g = new GenomeLoc(1, 49, 100); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100); mSortedSet.add(g); mSortedSet.addRegion(e); // now delete a region - GenomeLoc d = new GenomeLoc(1, 25, 75); + GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 25, 75); mSortedSet.removeRegion(d); Iterator iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); @@ -143,20 +143,20 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void deleteAllByRegion() { - GenomeLoc e = new GenomeLoc(1, 1, 100); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); mSortedSet.add(e); for (int x = 1; x < 101; x++) { - GenomeLoc del = new GenomeLoc(1,x,x); + GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); mSortedSet.removeRegion(del); } assertTrue(mSortedSet.isEmpty()); } @Test public void deleteSomeByRegion() { - GenomeLoc e = new GenomeLoc(1, 1, 100); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); mSortedSet.add(e); for (int x = 1; x < 50; x++) { - GenomeLoc del = new GenomeLoc(1,x,x); + GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); mSortedSet.removeRegion(del); } assertTrue(!mSortedSet.isEmpty()); @@ -169,13 +169,13 @@ public class GenomeLocSortedSetTest extends BaseTest { @Test public void deleteSuperRegion() { - GenomeLoc e = new GenomeLoc(1, 10, 20); - GenomeLoc g = new GenomeLoc(1, 70, 100); + GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 10, 20); + GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 70, 100); mSortedSet.add(g); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 2); // now delete a region - GenomeLoc d = new GenomeLoc(1, 15, 75); + GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 15, 75); mSortedSet.removeRegion(d); Iterator iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java index d056c6897..dd15804bb 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java @@ -22,7 +22,7 @@ public class GenomeLocTest extends BaseTest { public static void init() { // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq); + GenomeLocParser.setupRefContigOrdering(seq); } /** @@ -32,10 +32,10 @@ public class GenomeLocTest extends BaseTest { public void testIsBetween() { logger.warn("Executing testIsBetween"); - GenomeLoc locMiddle = new GenomeLoc("chr1", 3, 3); + GenomeLoc locMiddle = GenomeLocParser.createGenomeLoc("chr1", 3, 3); - GenomeLoc locLeft = new GenomeLoc("chr1", 1, 1); - GenomeLoc locRight = new GenomeLoc("chr1", 5, 5); + GenomeLoc locLeft = GenomeLocParser.createGenomeLoc("chr1", 1, 1); + GenomeLoc locRight = GenomeLocParser.createGenomeLoc("chr1", 5, 5); Assert.assertTrue(locMiddle.isBetween(locLeft, locRight)); Assert.assertFalse(locLeft.isBetween(locMiddle, locRight)); @@ -45,15 +45,15 @@ public class GenomeLocTest extends BaseTest { @Test public void testContigIndex() { logger.warn("Executing testContigIndex"); - GenomeLoc locOne = new GenomeLoc("chr1",1,1); + GenomeLoc locOne = GenomeLocParser.createGenomeLoc("chr1",1,1); Assert.assertEquals(locOne.getContigIndex(), 1); Assert.assertEquals(locOne.getContig(), "chr1"); - GenomeLoc locX = new GenomeLoc("chrX",1,1); + GenomeLoc locX = GenomeLocParser.createGenomeLoc("chrX",1,1); Assert.assertEquals(locX.getContigIndex(), 23); Assert.assertEquals(locX.getContig(), "chrX"); - GenomeLoc locNumber = new GenomeLoc(1,1,1); + GenomeLoc locNumber = GenomeLocParser.createGenomeLoc(1,1,1); Assert.assertEquals(locNumber.getContigIndex(), 1); Assert.assertEquals(locNumber.getContig(), "chr1"); Assert.assertEquals(locOne.compareTo(locNumber), 0); @@ -63,15 +63,15 @@ public class GenomeLocTest extends BaseTest { @Test public void testCompareTo() { logger.warn("Executing testCompareTo"); - GenomeLoc twoOne = new GenomeLoc("chr2", 1); - GenomeLoc twoFive = new GenomeLoc("chr2", 5); - GenomeLoc twoOtherFive = new GenomeLoc("chr2", 5); + GenomeLoc twoOne = GenomeLocParser.createGenomeLoc("chr2", 1); + GenomeLoc twoFive = GenomeLocParser.createGenomeLoc("chr2", 5); + GenomeLoc twoOtherFive = GenomeLocParser.createGenomeLoc("chr2", 5); Assert.assertEquals(0, twoFive.compareTo(twoOtherFive)); Assert.assertEquals(-1, twoOne.compareTo(twoFive)); Assert.assertEquals(1, twoFive.compareTo(twoOne)); - GenomeLoc oneOne = new GenomeLoc("chr1", 5); + GenomeLoc oneOne = GenomeLocParser.createGenomeLoc("chr1", 5); Assert.assertEquals(-1, oneOne.compareTo(twoOne)); Assert.assertEquals(1, twoOne.compareTo(oneOne)); } diff --git a/java/test/org/broadinstitute/sting/utils/RefHangerTest.java b/java/test/org/broadinstitute/sting/utils/RefHangerTest.java index 6c4faf504..cd5559eab 100755 --- a/java/test/org/broadinstitute/sting/utils/RefHangerTest.java +++ b/java/test/org/broadinstitute/sting/utils/RefHangerTest.java @@ -4,11 +4,12 @@ package org.broadinstitute.sting.utils; // the imports for unit testing. -import org.junit.*; -import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; -import org.broadinstitute.sting.utils.RefHanger; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; import java.io.File; import java.util.Arrays; @@ -69,7 +70,7 @@ public class RefHangerTest extends BaseTest { public static void init() { // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLoc.setupRefContigOrdering(seq); + GenomeLocParser.setupRefContigOrdering(seq); System.out.printf("Filled hanger is %n%s%n", makeFilledHanger()); } @@ -81,7 +82,7 @@ public class RefHangerTest extends BaseTest { l3 = Arrays.asList(6); l4 = Arrays.asList(7, 8); l5 = Arrays.asList(9, 10); - p1 = new GenomeLoc(0, 1, 1); + p1 = GenomeLocParser.createGenomeLoc(0, 1, 1); p2 = new GenomeLoc(p1).nextLoc(); p3 = new GenomeLoc(p2).nextLoc(); p4 = new GenomeLoc(p3).nextLoc(); @@ -94,7 +95,7 @@ public class RefHangerTest extends BaseTest { @Before public void setupHanger() { - startLoc = new GenomeLoc(0, 1, 1); // chrM 1 + startLoc = GenomeLocParser.createGenomeLoc(0, 1, 1); // chrM 1 emptyHanger = new RefHanger(); filledHanger = makeFilledHanger(); @@ -145,7 +146,7 @@ public class RefHangerTest extends BaseTest { assertTrue(filledHanger.hasLocation(p3)); assertTrue(filledHanger.hasLocation(p4)); assertTrue(filledHanger.hasLocation(p5)); - assertTrue(! filledHanger.hasLocation(new GenomeLoc(0, 6, 6))); + assertTrue(! filledHanger.hasLocation(GenomeLocParser.createGenomeLoc(0, 6, 6))); assertTrue(filledHanger.getHanger(0) != null); assertTrue(filledHanger.getHanger(1) != null);